summary refs log tree commit diff
diff options
context:
space:
mode:
authorreivilibre <oliverw@matrix.org>2023-11-13 16:03:22 +0000
committerGitHub <noreply@github.com>2023-11-13 16:03:22 +0000
commit69afe3f7a0d89f3422ddbd3aa16bc9bbc01056eb (patch)
treed6566d760c55a06560cd1af71fcb66a7d88e2707
parentFix outbound_federation_restricted_to docs & note when added (#16628) (diff)
downloadsynapse-69afe3f7a0d89f3422ddbd3aa16bc9bbc01056eb.tar.xz
Add a Postgres `REPLICA IDENTITY` to tables that do not have an implicit one. This should allow use of Postgres logical replication. (#16456)
* Add Postgres replica identities to tables that don't have an implicit one

Fixes #16224

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* Move the delta to version 83 as we missed the boat for 82

* Add a test that all tables have a REPLICA IDENTITY

* Extend the test to include when indices are deleted

* isort

* black

* Fully qualify `oid` as it is a 'hidden attribute' in Postgres 11

* Update tests/storage/test_database.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Add missed tables

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
-rw-r--r--changelog.d/16456.misc1
-rw-r--r--synapse/storage/schema/main/delta/83/04_replica_identities.sql.postgres88
-rw-r--r--synapse/storage/schema/state/delta/83/05_replica_identities_in_state_db.sql.postgres30
-rw-r--r--tests/storage/test_database.py84
4 files changed, 202 insertions, 1 deletions
diff --git a/changelog.d/16456.misc b/changelog.d/16456.misc
new file mode 100644
index 0000000000..baee042f2f
--- /dev/null
+++ b/changelog.d/16456.misc
@@ -0,0 +1 @@
+Add a Postgres `REPLICA IDENTITY` to tables that do not have an implicit one. This should allow use of Postgres logical replication.
\ No newline at end of file
diff --git a/synapse/storage/schema/main/delta/83/04_replica_identities.sql.postgres b/synapse/storage/schema/main/delta/83/04_replica_identities.sql.postgres
new file mode 100644
index 0000000000..8296d56e56
--- /dev/null
+++ b/synapse/storage/schema/main/delta/83/04_replica_identities.sql.postgres
@@ -0,0 +1,88 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Annotate some tables in Postgres with a REPLICA IDENTITY.
+-- Any table that doesn't have a primary key should be annotated explicitly with
+-- a REPLICA IDENTITY so that logical replication can be used.
+-- If this is not done, then UPDATE and DELETE statements on those tables
+-- will fail if logical replication is in use.
+
+
+-- Where possible, re-use unique indices already defined on tables as a replica
+-- identity.
+ALTER TABLE appservice_room_list REPLICA IDENTITY USING INDEX appservice_room_list_idx;
+ALTER TABLE batch_events REPLICA IDENTITY USING INDEX chunk_events_event_id;
+ALTER TABLE blocked_rooms REPLICA IDENTITY USING INDEX blocked_rooms_idx;
+ALTER TABLE cache_invalidation_stream_by_instance REPLICA IDENTITY USING INDEX cache_invalidation_stream_by_instance_id;
+ALTER TABLE device_lists_changes_in_room REPLICA IDENTITY USING INDEX device_lists_changes_in_stream_id;
+ALTER TABLE device_lists_outbound_last_success REPLICA IDENTITY USING INDEX device_lists_outbound_last_success_unique_idx;
+ALTER TABLE device_lists_remote_cache REPLICA IDENTITY USING INDEX device_lists_remote_cache_unique_id;
+ALTER TABLE device_lists_remote_extremeties REPLICA IDENTITY USING INDEX device_lists_remote_extremeties_unique_idx;
+ALTER TABLE device_lists_remote_resync REPLICA IDENTITY USING INDEX device_lists_remote_resync_idx;
+ALTER TABLE e2e_cross_signing_keys REPLICA IDENTITY USING INDEX e2e_cross_signing_keys_stream_idx;
+ALTER TABLE e2e_room_keys REPLICA IDENTITY USING INDEX e2e_room_keys_with_version_idx;
+ALTER TABLE e2e_room_keys_versions REPLICA IDENTITY USING INDEX e2e_room_keys_versions_idx;
+ALTER TABLE erased_users REPLICA IDENTITY USING INDEX erased_users_user;
+ALTER TABLE event_relations REPLICA IDENTITY USING INDEX event_relations_id;
+ALTER TABLE federation_inbound_events_staging REPLICA IDENTITY USING INDEX federation_inbound_events_staging_instance_event;
+ALTER TABLE federation_stream_position REPLICA IDENTITY USING INDEX federation_stream_position_instance;
+ALTER TABLE ignored_users REPLICA IDENTITY USING INDEX ignored_users_uniqueness;
+ALTER TABLE insertion_events REPLICA IDENTITY USING INDEX insertion_events_event_id;
+ALTER TABLE insertion_event_extremities REPLICA IDENTITY USING INDEX insertion_event_extremities_event_id;
+ALTER TABLE monthly_active_users REPLICA IDENTITY USING INDEX monthly_active_users_users;
+ALTER TABLE ratelimit_override REPLICA IDENTITY USING INDEX ratelimit_override_idx;
+ALTER TABLE room_stats_earliest_token REPLICA IDENTITY USING INDEX room_stats_earliest_token_idx;
+ALTER TABLE room_stats_state REPLICA IDENTITY USING INDEX room_stats_state_room;
+ALTER TABLE stream_positions REPLICA IDENTITY USING INDEX stream_positions_idx;
+ALTER TABLE user_directory REPLICA IDENTITY USING INDEX user_directory_user_idx;
+ALTER TABLE user_directory_search REPLICA IDENTITY USING INDEX user_directory_search_user_idx;
+ALTER TABLE user_ips REPLICA IDENTITY USING INDEX user_ips_user_token_ip_unique_index;
+ALTER TABLE user_signature_stream REPLICA IDENTITY USING INDEX user_signature_stream_idx;
+ALTER TABLE users_in_public_rooms REPLICA IDENTITY USING INDEX users_in_public_rooms_u_idx;
+ALTER TABLE users_who_share_private_rooms REPLICA IDENTITY USING INDEX users_who_share_private_rooms_u_idx;
+ALTER TABLE user_threepid_id_server REPLICA IDENTITY USING INDEX user_threepid_id_server_idx;
+ALTER TABLE worker_locks REPLICA IDENTITY USING INDEX worker_locks_key;
+
+
+-- Where there are no unique indices, use the entire rows as replica identities.
+ALTER TABLE current_state_delta_stream REPLICA IDENTITY FULL;
+ALTER TABLE deleted_pushers REPLICA IDENTITY FULL;
+ALTER TABLE device_auth_providers REPLICA IDENTITY FULL;
+ALTER TABLE device_federation_inbox REPLICA IDENTITY FULL;
+ALTER TABLE device_federation_outbox REPLICA IDENTITY FULL;
+ALTER TABLE device_inbox REPLICA IDENTITY FULL;
+ALTER TABLE device_lists_outbound_pokes REPLICA IDENTITY FULL;
+ALTER TABLE device_lists_stream REPLICA IDENTITY FULL;
+ALTER TABLE e2e_cross_signing_signatures REPLICA IDENTITY FULL;
+ALTER TABLE event_auth_chain_links REPLICA IDENTITY FULL;
+ALTER TABLE event_auth REPLICA IDENTITY FULL;
+ALTER TABLE event_push_actions_staging REPLICA IDENTITY FULL;
+ALTER TABLE insertion_event_edges REPLICA IDENTITY FULL;
+ALTER TABLE local_media_repository_url_cache REPLICA IDENTITY FULL;
+ALTER TABLE presence_stream REPLICA IDENTITY FULL;
+ALTER TABLE push_rules_stream REPLICA IDENTITY FULL;
+ALTER TABLE room_alias_servers REPLICA IDENTITY FULL;
+ALTER TABLE stream_ordering_to_exterm REPLICA IDENTITY FULL;
+ALTER TABLE timeline_gaps REPLICA IDENTITY FULL;
+ALTER TABLE user_daily_visits REPLICA IDENTITY FULL;
+ALTER TABLE users_pending_deactivation REPLICA IDENTITY FULL;
+
+-- special cases: unique indices on nullable columns can't be used
+ALTER TABLE event_push_summary REPLICA IDENTITY FULL;
+ALTER TABLE event_search REPLICA IDENTITY FULL;
+ALTER TABLE local_media_repository_thumbnails REPLICA IDENTITY FULL;
+ALTER TABLE remote_media_cache_thumbnails REPLICA IDENTITY FULL;
+ALTER TABLE threepid_guest_access_tokens REPLICA IDENTITY FULL;
+ALTER TABLE user_filters REPLICA IDENTITY FULL; -- sadly the `CHECK` constraint is not enough here
diff --git a/synapse/storage/schema/state/delta/83/05_replica_identities_in_state_db.sql.postgres b/synapse/storage/schema/state/delta/83/05_replica_identities_in_state_db.sql.postgres
new file mode 100644
index 0000000000..9b792a39e2
--- /dev/null
+++ b/synapse/storage/schema/state/delta/83/05_replica_identities_in_state_db.sql.postgres
@@ -0,0 +1,30 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Annotate some tables in Postgres with a REPLICA IDENTITY.
+-- Any table that doesn't have a primary key should be annotated explicitly with
+-- a REPLICA IDENTITY so that logical replication can be used.
+-- If this is not done, then UPDATE and DELETE statements on those tables
+-- will fail if logical replication is in use.
+-- See also: 82/04_replica_identities.sql.postgres on the main database
+
+
+-- Where possible, re-use unique indices already defined on tables as a replica
+-- identity.
+ALTER TABLE state_group_edges REPLICA IDENTITY USING INDEX state_group_edges_unique_idx;
+
+
+-- Where there are no unique indices, use the entire rows as replica identities.
+ALTER TABLE state_groups_state REPLICA IDENTITY FULL;
diff --git a/tests/storage/test_database.py b/tests/storage/test_database.py
index 8cd7c89ca2..92ddaa6f4c 100644
--- a/tests/storage/test_database.py
+++ b/tests/storage/test_database.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Tuple
+from typing import Callable, List, Tuple
 from unittest.mock import Mock, call
 
 from twisted.internet import defer
@@ -29,6 +29,7 @@ from synapse.storage.database import (
 from synapse.util import Clock
 
 from tests import unittest
+from tests.utils import USE_POSTGRES_FOR_TESTS
 
 
 class TupleComparisonClauseTestCase(unittest.TestCase):
@@ -278,3 +279,84 @@ class CancellationTestCase(unittest.HomeserverTestCase):
             ]
         )
         self.assertEqual(exception_callback.call_count, 6)  # no additional calls
+
+
+class PostgresReplicaIdentityTestCase(unittest.HomeserverTestCase):
+    if not USE_POSTGRES_FOR_TESTS:
+        skip = "Requires Postgres"
+
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
+        self.db_pools = homeserver.get_datastores().databases
+
+    def test_all_tables_have_postgres_replica_identity(self) -> None:
+        """
+        Tests that all tables have a Postgres REPLICA IDENTITY.
+        (See #16224).
+
+        Tables with a PRIMARY KEY have an implied REPLICA IDENTITY and are fine.
+        Other tables need them to be set with `ALTER TABLE`.
+
+        A REPLICA IDENTITY is required for Postgres logical replication to work
+        properly without blocking updates and deletes.
+        """
+
+        sql = """
+            -- Select tables that have no primary key and use the default replica identity rule
+            -- (the default is to use the primary key)
+            WITH tables_no_pkey AS (
+                SELECT tbl.table_schema, tbl.table_name
+                FROM information_schema.tables tbl
+                WHERE table_type = 'BASE TABLE'
+                    AND table_schema not in ('pg_catalog', 'information_schema')
+                    AND NOT EXISTS (
+                        SELECT 1
+                        FROM information_schema.key_column_usage kcu
+                        WHERE kcu.table_name = tbl.table_name
+                            AND kcu.table_schema = tbl.table_schema
+                    )
+            )
+            SELECT pg_class.oid::regclass FROM tables_no_pkey INNER JOIN pg_class ON pg_class.oid::regclass = table_name::regclass
+            WHERE relreplident = 'd'
+
+            UNION
+
+            -- Also select tables that use an index as a replica identity
+            -- but where the index doesn't exist
+            -- (e.g. it could have been deleted)
+            SELECT pg_class.oid::regclass
+                FROM information_schema.tables tbl
+                INNER JOIN pg_class ON pg_class.oid::regclass = table_name::regclass
+                WHERE table_type = 'BASE TABLE'
+                    AND table_schema not in ('pg_catalog', 'information_schema')
+
+                    -- 'i' means an index is used as the replica identity
+                    AND relreplident = 'i'
+
+                    -- look for indices that are marked as the replica identity
+                    AND NOT EXISTS (
+                        SELECT indexrelid::regclass
+                        FROM pg_index
+                        WHERE indrelid = pg_class.oid::regclass AND indisreplident
+                    )
+        """
+
+        def _list_tables_with_missing_replica_identities_txn(
+            txn: LoggingTransaction,
+        ) -> List[str]:
+            txn.execute(sql)
+            return [table_name for table_name, in txn]
+
+        for pool in self.db_pools:
+            missing = self.get_success(
+                pool.runInteraction(
+                    "test_list_missing_replica_identities",
+                    _list_tables_with_missing_replica_identities_txn,
+                )
+            )
+            self.assertEqual(
+                len(missing),
+                0,
+                f"The following tables in the {pool.name()!r} database are missing REPLICA IDENTITIES: {missing!r}.",
+            )