summary refs log tree commit diff
diff options
context:
space:
mode:
authorOlivier Wilkinson (reivilibre) <oliverw@matrix.org>2023-11-16 12:22:29 +0000
committerOlivier Wilkinson (reivilibre) <oliverw@matrix.org>2023-11-16 12:22:29 +0000
commit78b114a04afca637b3a5cafbc2302ed9ca6e720e (patch)
tree73103e93e9a3ce7f776ef1ee97668a9a14909364
parentAdd an Admin API to temporarily grant the ability to update an existing cross... (diff)
downloadsynapse-78b114a04afca637b3a5cafbc2302ed9ca6e720e.tar.xz
Add a PRIMARY KEY lint
-rw-r--r--tests/storage/test_database.py135
1 files changed, 134 insertions, 1 deletions
diff --git a/tests/storage/test_database.py b/tests/storage/test_database.py
index d60176b1d4..97e21fca15 100644
--- a/tests/storage/test_database.py
+++ b/tests/storage/test_database.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, List, Tuple
+from typing import Callable, List, Set, Tuple
 from unittest.mock import Mock, call
 
 from twisted.internet import defer
@@ -361,3 +361,136 @@ class PostgresReplicaIdentityTestCase(unittest.HomeserverTestCase):
                 0,
                 f"The following tables in the {pool.name()!r} database are missing REPLICA IDENTITIES: {missing!r}.",
             )
+
+
+class PostgresPrimaryKeyTestCase(unittest.HomeserverTestCase):
+    if not USE_POSTGRES_FOR_TESTS:
+        skip = "Requires Postgres"
+
+    # Some tables are exempt, mostly for historical reasons
+    EXEMPT_TABLES = {
+        "appservice_room_list",
+        "batch_events",
+        "blocked_rooms",
+        "cache_invalidation_stream_by_instance",
+        "current_state_delta_stream",
+        "deleted_pushers",
+        "device_auth_providers",
+        "device_federation_inbox",
+        "device_federation_outbox",
+        "device_inbox",
+        "device_lists_changes_in_room",
+        "device_lists_outbound_last_success",
+        "device_lists_outbound_pokes",
+        "device_lists_remote_cache",
+        "device_lists_remote_extremeties",
+        "device_lists_remote_resync",
+        "device_lists_stream",
+        "e2e_cross_signing_keys",
+        "e2e_cross_signing_signatures",
+        "e2e_room_keys",
+        "e2e_room_keys_versions",
+        "erased_users",
+        "event_auth",
+        "event_auth_chain_links",
+        "event_push_actions_staging",
+        "event_push_summary",
+        "event_relations",
+        "event_search",
+        "federation_inbound_events_staging",
+        "federation_stream_position",
+        "ignored_users",
+        "insertion_event_edges",
+        "insertion_event_extremities",
+        "insertion_events",
+        "local_media_repository_thumbnails",
+        "local_media_repository_url_cache",
+        "monthly_active_users",
+        "presence_stream",
+        "push_rules_stream",
+        "ratelimit_override",
+        "remote_media_cache_thumbnails",
+        "room_alias_servers",
+        "room_stats_earliest_token",
+        "room_stats_state",
+        "state_group_edges",
+        "state_groups_state",
+        "stream_ordering_to_exterm",
+        "stream_positions",
+        "threepid_guest_access_tokens",
+        "timeline_gaps",
+        "user_daily_visits",
+        "user_directory",
+        "user_directory_search",
+        "user_filters",
+        "user_ips",
+        "user_signature_stream",
+        "user_threepid_id_server",
+        "users_in_public_rooms",
+        "users_pending_deactivation",
+        "users_who_share_private_rooms",
+        "worker_locks",
+    }
+
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
+        self.db_pools = homeserver.get_datastores().databases
+
+    def test_all_nonexempt_tables_have_primary_key(self) -> None:
+        """
+        Tests that all tables have a PRIMARY KEY.
+
+        Whilst there are occasionally potentially good reasons to not have one,
+        we should default to having PRIMARY KEYs on new tables.
+
+        Historically we have not created PRIMARY KEYs when they should be created
+        (sometimes even creating a UNIQUE index on the same columns instead).
+
+        It is preferable to use PRIMARY KEYs instead of UNIQUE indices where possible
+        because this:
+
+        - provides a default REPLICA IDENTITY, ensuring logical replication works
+          without extra complication;
+        - is sometimes more helpful for understanding; and
+        - is more useful to external tools which expect PRIMARY KEYs (e.g. some database
+          migration tools on some cloud providers do not work with tables without PRIMARY
+          KEYs)
+        """
+
+        sql = """
+            SELECT tbl.table_name
+            FROM information_schema.tables tbl
+            WHERE table_type = 'BASE TABLE'
+                AND table_schema not in ('pg_catalog', 'information_schema')
+                AND NOT EXISTS (
+                    SELECT 1
+                    FROM information_schema.key_column_usage kcu
+                    WHERE kcu.table_name = tbl.table_name
+                        AND kcu.table_schema = tbl.table_schema
+                )
+        """
+
+        def _list_tables_with_missing_primary_keys_txn(
+            txn: LoggingTransaction,
+        ) -> Set[str]:
+            txn.execute(sql)
+            return {table_name for table_name, in txn}
+
+        for pool in self.db_pools:
+            missing = sorted(
+                self.get_success(
+                    pool.runInteraction(
+                        "test_list_missing_primary_keys",
+                        _list_tables_with_missing_primary_keys_txn,
+                    )
+                )
+                - self.EXEMPT_TABLES
+            )
+            self.assertEqual(
+                len(missing),
+                0,
+                f"The following tables in the {pool.name()!r} database are missing PRIMARY KEYs: {missing!r}.\n"
+                f"Please consider adding a primary key if it makes sense to do so — this gives the table a replica identity for free and helps humans and tools to understand the table better.\n"
+                f"If this lack of a primary key is intentional, please add an exemption.",
+            )