summary refs log tree commit diff
diff options
context:
space:
mode:
authorSean Quah <8349537+squahtx@users.noreply.github.com>2022-02-24 11:52:28 +0000
committerGitHub <noreply@github.com>2022-02-24 11:52:28 +0000
commit41cf4c2cf6432336cc7477f130a2847449cff99a (patch)
treecdec629565287e2f8bf5dab212b0ddafdc12c6be
parentAdd documentation for missing worker types. (#11599) (diff)
downloadsynapse-41cf4c2cf6432336cc7477f130a2847449cff99a.tar.xz
Fix non-strings in the `event_search` table (#12037)
Don't attempt to add non-string `value`s to `event_search` and add a
background update to clear out bad rows from `event_search` when
using sqlite.

Signed-off-by: Sean Quah <seanq@element.io>
Diffstat (limited to '')
-rw-r--r--changelog.d/12037.bugfix1
-rw-r--r--synapse/storage/databases/main/events.py18
-rw-r--r--synapse/storage/databases/main/search.py26
-rw-r--r--synapse/storage/schema/main/delta/68/05_delete_non_strings_from_event_search.sql.sqlite22
-rw-r--r--tests/storage/test_room_search.py117
5 files changed, 173 insertions, 11 deletions
diff --git a/changelog.d/12037.bugfix b/changelog.d/12037.bugfix
new file mode 100644
index 0000000000..9295cb4dc0
--- /dev/null
+++ b/changelog.d/12037.bugfix
@@ -0,0 +1 @@
+Properly fix a long-standing bug where wrong data could be inserted in the `event_search` table when using sqlite. This could block running `synapse_port_db` with an "argument of type 'int' is not iterable" error. This bug was partially fixed by a change in Synapse 1.44.0.
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index a1d7a9b413..e53e84054a 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1473,10 +1473,10 @@ class PersistEventsStore:
 
     def _update_metadata_tables_txn(
         self,
-        txn,
+        txn: LoggingTransaction,
         *,
-        events_and_contexts,
-        all_events_and_contexts,
+        events_and_contexts: List[Tuple[EventBase, EventContext]],
+        all_events_and_contexts: List[Tuple[EventBase, EventContext]],
         inhibit_local_membership_updates: bool = False,
     ):
         """Update all the miscellaneous tables for new events
@@ -1953,20 +1953,20 @@ class PersistEventsStore:
             txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
         )
 
-    def _store_room_topic_txn(self, txn, event):
-        if hasattr(event, "content") and "topic" in event.content:
+    def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase):
+        if isinstance(event.content.get("topic"), str):
             self.store_event_search_txn(
                 txn, event, "content.topic", event.content["topic"]
             )
 
-    def _store_room_name_txn(self, txn, event):
-        if hasattr(event, "content") and "name" in event.content:
+    def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase):
+        if isinstance(event.content.get("name"), str):
             self.store_event_search_txn(
                 txn, event, "content.name", event.content["name"]
             )
 
-    def _store_room_message_txn(self, txn, event):
-        if hasattr(event, "content") and "body" in event.content:
+    def _store_room_message_txn(self, txn: LoggingTransaction, event: EventBase):
+        if isinstance(event.content.get("body"), str):
             self.store_event_search_txn(
                 txn, event, "content.body", event.content["body"]
             )
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index acea300ed3..e23b119072 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -115,6 +115,7 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
     EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
     EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
     EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"
+    EVENT_SEARCH_DELETE_NON_STRINGS = "event_search_sqlite_delete_non_strings"
 
     def __init__(
         self,
@@ -147,6 +148,10 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
             self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME, self._background_reindex_gin_search
         )
 
+        self.db_pool.updates.register_background_update_handler(
+            self.EVENT_SEARCH_DELETE_NON_STRINGS, self._background_delete_non_strings
+        )
+
     async def _background_reindex_search(self, progress, batch_size):
         # we work through the events table from highest stream id to lowest
         target_min_stream_id = progress["target_min_stream_id_inclusive"]
@@ -372,6 +377,27 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
 
         return num_rows
 
+    async def _background_delete_non_strings(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """Deletes rows with non-string `value`s from `event_search` if using sqlite.
+
+        Prior to Synapse 1.44.0, malformed events received over federation could cause integers
+        to be inserted into the `event_search` table when using sqlite.
+        """
+
+        def delete_non_strings_txn(txn: LoggingTransaction) -> None:
+            txn.execute("DELETE FROM event_search WHERE typeof(value) != 'text'")
+
+        await self.db_pool.runInteraction(
+            self.EVENT_SEARCH_DELETE_NON_STRINGS, delete_non_strings_txn
+        )
+
+        await self.db_pool.updates._end_background_update(
+            self.EVENT_SEARCH_DELETE_NON_STRINGS
+        )
+        return 1
+
 
 class SearchStore(SearchBackgroundUpdateStore):
     def __init__(
diff --git a/synapse/storage/schema/main/delta/68/05_delete_non_strings_from_event_search.sql.sqlite b/synapse/storage/schema/main/delta/68/05_delete_non_strings_from_event_search.sql.sqlite
new file mode 100644
index 0000000000..140df65264
--- /dev/null
+++ b/synapse/storage/schema/main/delta/68/05_delete_non_strings_from_event_search.sql.sqlite
@@ -0,0 +1,22 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- Delete rows with non-string `value`s from `event_search` if using sqlite.
+--
+-- Prior to Synapse 1.44.0, malformed events received over federation could
+-- cause integers to be inserted into the `event_search` table.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (6805, 'event_search_sqlite_delete_non_strings', '{}');
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index befaa0fcee..d62e01726c 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -13,13 +13,16 @@
 # limitations under the License.
 
 import synapse.rest.admin
+from synapse.api.constants import EventTypes
+from synapse.api.errors import StoreError
 from synapse.rest.client import login, room
 from synapse.storage.engines import PostgresEngine
 
-from tests.unittest import HomeserverTestCase
+from tests.unittest import HomeserverTestCase, skip_unless
+from tests.utils import USE_POSTGRES_FOR_TESTS
 
 
-class NullByteInsertionTest(HomeserverTestCase):
+class EventSearchInsertionTest(HomeserverTestCase):
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         login.register_servlets,
@@ -72,3 +75,113 @@ class NullByteInsertionTest(HomeserverTestCase):
         )
         if isinstance(store.database_engine, PostgresEngine):
             self.assertIn("alice", result.get("highlights"))
+
+    def test_non_string(self):
+        """Test that non-string `value`s are not inserted into `event_search`.
+
+        This is particularly important when using sqlite, since a sqlite column can hold
+        both strings and integers. When using Postgres, integers are automatically
+        converted to strings.
+
+        Regression test for #11918.
+        """
+        store = self.hs.get_datastores().main
+
+        # Register a user and create a room
+        user_id = self.register_user("alice", "password")
+        access_token = self.login("alice", "password")
+        room_id = self.helper.create_room_as("alice", tok=access_token)
+        room_version = self.get_success(store.get_room_version(room_id))
+
+        # Construct a message with a numeric body to be received over federation
+        # The message can't be sent using the client API, since Synapse's event
+        # validation will reject it.
+        prev_event_ids = self.get_success(store.get_prev_events_for_room(room_id))
+        prev_event = self.get_success(store.get_event(prev_event_ids[0]))
+        prev_state_map = self.get_success(
+            self.hs.get_storage().state.get_state_ids_for_event(prev_event_ids[0])
+        )
+
+        event_dict = {
+            "type": EventTypes.Message,
+            "content": {"msgtype": "m.text", "body": 2},
+            "room_id": room_id,
+            "sender": user_id,
+            "depth": prev_event.depth + 1,
+            "prev_events": prev_event_ids,
+            "origin_server_ts": self.clock.time_msec(),
+        }
+        builder = self.hs.get_event_builder_factory().for_room_version(
+            room_version, event_dict
+        )
+        event = self.get_success(
+            builder.build(
+                prev_event_ids=prev_event_ids,
+                auth_event_ids=self.hs.get_event_auth_handler().compute_auth_events(
+                    builder,
+                    prev_state_map,
+                    for_verification=False,
+                ),
+                depth=event_dict["depth"],
+            )
+        )
+
+        # Receive the event
+        self.get_success(
+            self.hs.get_federation_event_handler().on_receive_pdu(
+                self.hs.hostname, event
+            )
+        )
+
+        # The event should not have an entry in the `event_search` table
+        f = self.get_failure(
+            store.db_pool.simple_select_one_onecol(
+                "event_search",
+                {"room_id": room_id, "event_id": event.event_id},
+                "event_id",
+            ),
+            StoreError,
+        )
+        self.assertEqual(f.value.code, 404)
+
+    @skip_unless(not USE_POSTGRES_FOR_TESTS, "requires sqlite")
+    def test_sqlite_non_string_deletion_background_update(self):
+        """Test the background update to delete bad rows from `event_search`."""
+        store = self.hs.get_datastores().main
+
+        # Populate `event_search` with dummy data
+        self.get_success(
+            store.db_pool.simple_insert_many(
+                "event_search",
+                keys=["event_id", "room_id", "key", "value"],
+                values=[
+                    ("event1", "room_id", "content.body", "hi"),
+                    ("event2", "room_id", "content.body", "2"),
+                    ("event3", "room_id", "content.body", 3),
+                ],
+                desc="populate_event_search",
+            )
+        )
+
+        # Run the background update
+        store.db_pool.updates._all_done = False
+        self.get_success(
+            store.db_pool.simple_insert(
+                "background_updates",
+                {
+                    "update_name": "event_search_sqlite_delete_non_strings",
+                    "progress_json": "{}",
+                },
+            )
+        )
+        self.wait_for_background_updates()
+
+        # The non-string `value`s ought to be gone now.
+        values = self.get_success(
+            store.db_pool.simple_select_onecol(
+                "event_search",
+                {"room_id": "room_id"},
+                "value",
+            ),
+        )
+        self.assertCountEqual(values, ["hi", "2"])