summary refs log tree commit diff
path: root/synapse/storage
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2022-03-01 12:49:54 +0000
committerGitHub <noreply@github.com>2022-03-01 12:49:54 +0000
commite2e1d90a5e4030616a3de242cde26c0cfff4a6b5 (patch)
tree80b848f98f963ed24fc46ca1d6f191ff04f4d9e8 /synapse/storage
parentAdvertise Python 3.10 support in setup.py (#12111) (diff)
downloadsynapse-e2e1d90a5e4030616a3de242cde26c0cfff4a6b5.tar.xz
Faster joins: persist to database (#12012)
When we get a partial_state response from send_join, store information in the
database about it:
 * store a record about the room as a whole having partial state, and stash the
   list of member servers too.
 * flag the join event itself as having partial state
 * also, for any new events whose prev-events are partial-stated, note that
   they will *also* be partial-stated.

We don't yet make any attempt to interpret this data, so API calls (and a bunch
of other things) are just going to get incorrect data.
Diffstat (limited to 'synapse/storage')
-rw-r--r--synapse/storage/databases/main/events.py25
-rw-r--r--synapse/storage/databases/main/events_worker.py28
-rw-r--r--synapse/storage/databases/main/room.py37
-rw-r--r--synapse/storage/schema/main/delta/68/04partial_state_rooms.sql41
-rw-r--r--synapse/storage/schema/main/delta/68/05partial_state_rooms_triggers.py72
5 files changed, 203 insertions, 0 deletions
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 23fa089bca..ca2a9ba9d1 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2145,6 +2145,14 @@ class PersistEventsStore:
         state_groups = {}
         for event, context in events_and_contexts:
             if event.internal_metadata.is_outlier():
+                # double-check that we don't have any events that claim to be outliers
+                # *and* have partial state (which is meaningless: we should have no
+                # state at all for an outlier)
+                if context.partial_state:
+                    raise ValueError(
+                        "Outlier event %s claims to have partial state", event.event_id
+                    )
+
                 continue
 
             # if the event was rejected, just give it the same state as its
@@ -2155,6 +2163,23 @@ class PersistEventsStore:
 
             state_groups[event.event_id] = context.state_group
 
+        # if we have partial state for these events, record the fact. (This happens
+        # here rather than in _store_event_txn because it also needs to happen when
+        # we de-outlier an event.)
+        self.db_pool.simple_insert_many_txn(
+            txn,
+            table="partial_state_events",
+            keys=("room_id", "event_id"),
+            values=[
+                (
+                    event.room_id,
+                    event.event_id,
+                )
+                for event, ctx in events_and_contexts
+                if ctx.partial_state
+            ],
+        )
+
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_to_state_groups",
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 2a255d1031..26784f755e 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1953,3 +1953,31 @@ class EventsWorkerStore(SQLBaseStore):
             "get_event_id_for_timestamp_txn",
             get_event_id_for_timestamp_txn,
         )
+
+    @cachedList("is_partial_state_event", list_name="event_ids")
+    async def get_partial_state_events(
+        self, event_ids: Collection[str]
+    ) -> Dict[str, bool]:
+        """Checks which of the given events have partial state"""
+        result = await self.db_pool.simple_select_many_batch(
+            table="partial_state_events",
+            column="event_id",
+            iterable=event_ids,
+            retcols=["event_id"],
+            desc="get_partial_state_events",
+        )
+        # convert the result to a dict, to make @cachedList work
+        partial = {r["event_id"] for r in result}
+        return {e_id: e_id in partial for e_id in event_ids}
+
+    @cached()
+    async def is_partial_state_event(self, event_id: str) -> bool:
+        """Checks if the given event has partial state"""
+        result = await self.db_pool.simple_select_one_onecol(
+            table="partial_state_events",
+            keyvalues={"event_id": event_id},
+            retcol="1",
+            allow_none=True,
+            desc="is_partial_state_event",
+        )
+        return result is not None
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 0416df64ce..94068940b9 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -20,6 +20,7 @@ from typing import (
     TYPE_CHECKING,
     Any,
     Awaitable,
+    Collection,
     Dict,
     List,
     Optional,
@@ -1543,6 +1544,42 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             lock=False,
         )
 
+    async def store_partial_state_room(
+        self,
+        room_id: str,
+        servers: Collection[str],
+    ) -> None:
+        """Mark the given room as containing events with partial state
+
+        Args:
+            room_id: the ID of the room
+            servers: other servers known to be in the room
+        """
+        await self.db_pool.runInteraction(
+            "store_partial_state_room",
+            self._store_partial_state_room_txn,
+            room_id,
+            servers,
+        )
+
+    @staticmethod
+    def _store_partial_state_room_txn(
+        txn: LoggingTransaction, room_id: str, servers: Collection[str]
+    ) -> None:
+        DatabasePool.simple_insert_txn(
+            txn,
+            table="partial_state_rooms",
+            values={
+                "room_id": room_id,
+            },
+        )
+        DatabasePool.simple_insert_many_txn(
+            txn,
+            table="partial_state_rooms_servers",
+            keys=("room_id", "server_name"),
+            values=((room_id, s) for s in servers),
+        )
+
     async def maybe_store_room_on_outlier_membership(
         self, room_id: str, room_version: RoomVersion
     ) -> None:
diff --git a/synapse/storage/schema/main/delta/68/04partial_state_rooms.sql b/synapse/storage/schema/main/delta/68/04partial_state_rooms.sql
new file mode 100644
index 0000000000..815c0cc390
--- /dev/null
+++ b/synapse/storage/schema/main/delta/68/04partial_state_rooms.sql
@@ -0,0 +1,41 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- rooms which we have done a partial-state-style join to
+CREATE TABLE IF NOT EXISTS partial_state_rooms (
+    room_id TEXT PRIMARY KEY,
+    FOREIGN KEY(room_id) REFERENCES rooms(room_id)
+);
+
+-- a list of remote servers we believe are in the room
+CREATE TABLE IF NOT EXISTS partial_state_rooms_servers (
+    room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id),
+    server_name TEXT NOT NULL,
+    UNIQUE(room_id, server_name)
+);
+
+-- a list of events with partial state. We can't store this in the `events` table
+-- itself, because `events` is meant to be append-only.
+CREATE TABLE IF NOT EXISTS partial_state_events (
+    -- the room_id is denormalised for efficient indexing (the canonical source is `events`)
+    room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id),
+    event_id TEXT NOT NULL REFERENCES events(event_id),
+    UNIQUE(event_id)
+);
+
+CREATE INDEX IF NOT EXISTS partial_state_events_room_id_idx
+     ON partial_state_events (room_id);
+
+
diff --git a/synapse/storage/schema/main/delta/68/05partial_state_rooms_triggers.py b/synapse/storage/schema/main/delta/68/05partial_state_rooms_triggers.py
new file mode 100644
index 0000000000..a2ec4fc26e
--- /dev/null
+++ b/synapse/storage/schema/main/delta/68/05partial_state_rooms_triggers.py
@@ -0,0 +1,72 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This migration adds triggers to the partial_state_events tables to enforce uniqueness
+
+Triggers cannot be expressed in .sql files, so we have to use a separate file.
+"""
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
+from synapse.storage.types import Cursor
+
+
+def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+    # complain if the room_id in partial_state_events doesn't match
+    # that in `events`. We already have a fk constraint which ensures that the event
+    # exists in `events`, so all we have to do is raise if there is a row with a
+    # matching stream_ordering but not a matching room_id.
+    if isinstance(database_engine, Sqlite3Engine):
+        cur.execute(
+            """
+            CREATE TRIGGER IF NOT EXISTS partial_state_events_bad_room_id
+            BEFORE INSERT ON partial_state_events
+            FOR EACH ROW
+            BEGIN
+                SELECT RAISE(ABORT, 'Incorrect room_id in partial_state_events')
+                WHERE EXISTS (
+                    SELECT 1 FROM events
+                    WHERE events.event_id = NEW.event_id
+                       AND events.room_id != NEW.room_id
+                );
+            END;
+            """
+        )
+    elif isinstance(database_engine, PostgresEngine):
+        cur.execute(
+            """
+            CREATE OR REPLACE FUNCTION check_partial_state_events() RETURNS trigger AS $BODY$
+            BEGIN
+                IF EXISTS (
+                    SELECT 1 FROM events
+                    WHERE events.event_id = NEW.event_id
+                       AND events.room_id != NEW.room_id
+                ) THEN
+                    RAISE EXCEPTION 'Incorrect room_id in partial_state_events';
+                END IF;
+                RETURN NEW;
+            END;
+            $BODY$ LANGUAGE plpgsql;
+            """
+        )
+
+        cur.execute(
+            """
+            CREATE TRIGGER check_partial_state_events BEFORE INSERT OR UPDATE ON partial_state_events
+            FOR EACH ROW
+            EXECUTE PROCEDURE check_partial_state_events()
+            """
+        )
+    else:
+        raise NotImplementedError("Unknown database engine")