summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2020-11-11 15:07:34 +0000
committerGitHub <noreply@github.com>2020-11-11 15:07:34 +0000
commit5829872bec9b9986c741eafec36e47774e4d2b3e (patch)
treed440589d411ddd55141613dc2f6e921101acfebb
parentCheck support room has only two users before sending a notice (#8728) (diff)
downloadsynapse-5829872bec9b9986c741eafec36e47774e4d2b3e.tar.xz
Fix port script to handle foreign key constraints (#8730)
-rw-r--r--.buildkite/test_db.dbbin19279872 -> 19296256 bytes
-rw-r--r--changelog.d/8730.bugfix1
-rwxr-xr-xscripts/synapse_port_db68
3 files changed, 63 insertions, 6 deletions
diff --git a/.buildkite/test_db.db b/.buildkite/test_db.db
index 361369a581..a0d9f16a75 100644
--- a/.buildkite/test_db.db
+++ b/.buildkite/test_db.db
Binary files differdiff --git a/changelog.d/8730.bugfix b/changelog.d/8730.bugfix
new file mode 100644
index 0000000000..dcc42bc981
--- /dev/null
+++ b/changelog.d/8730.bugfix
@@ -0,0 +1 @@
+Fix port script to correctly handle foreign key constraints. Broke in v1.21.0.
diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db
index 13c0120bb4..7a638ea8e3 100755
--- a/scripts/synapse_port_db
+++ b/scripts/synapse_port_db
@@ -22,7 +22,7 @@ import logging
 import sys
 import time
 import traceback
-from typing import Optional
+from typing import Dict, Optional, Set
 
 import yaml
 
@@ -292,6 +292,34 @@ class Porter(object):
 
         return table, already_ported, total_to_port, forward_chunk, backward_chunk
 
+    async def get_table_constraints(self) -> Dict[str, Set[str]]:
+        """Returns a map of tables that have foreign key constraints to tables they depend on.
+        """
+
+        def _get_constraints(txn):
+            # We can pull the information about foreign key constraints out from
+            # the postgres schema tables.
+            sql = """
+                SELECT DISTINCT
+                    tc.table_name,
+                    ccu.table_name AS foreign_table_name
+                FROM
+                    information_schema.table_constraints AS tc
+                    INNER JOIN information_schema.constraint_column_usage AS ccu
+                    USING (table_schema, constraint_name)
+                WHERE tc.constraint_type = 'FOREIGN KEY';
+            """
+            txn.execute(sql)
+
+            results = {}
+            for table, foreign_table in txn:
+                results.setdefault(table, set()).add(foreign_table)
+            return results
+
+        return await self.postgres_store.db_pool.runInteraction(
+            "get_table_constraints", _get_constraints
+        )
+
     async def handle_table(
         self, table, postgres_size, table_size, forward_chunk, backward_chunk
     ):
@@ -619,15 +647,43 @@ class Porter(object):
                     consumeErrors=True,
                 )
             )
+            # Map from table name to args passed to `handle_table`, i.e. a tuple
+            # of: `postgres_size`, `table_size`, `forward_chunk`, `backward_chunk`.
+            tables_to_port_info_map = {r[0]: r[1:] for r in setup_res}
 
             # Step 4. Do the copying.
+            #
+            # This is slightly convoluted as we need to ensure tables are ported
+            # in the correct order due to foreign key constraints.
             self.progress.set_state("Copying to postgres")
-            await make_deferred_yieldable(
-                defer.gatherResults(
-                    [run_in_background(self.handle_table, *res) for res in setup_res],
-                    consumeErrors=True,
+
+            constraints = await self.get_table_constraints()
+            tables_ported = set()  # type: Set[str]
+
+            while tables_to_port_info_map:
+                # Pulls out all tables that are still to be ported and which
+                # only depend on tables that are already ported (if any).
+                tables_to_port = [
+                    table
+                    for table in tables_to_port_info_map
+                    if not constraints.get(table, set()) - tables_ported
+                ]
+
+                await make_deferred_yieldable(
+                    defer.gatherResults(
+                        [
+                            run_in_background(
+                                self.handle_table,
+                                table,
+                                *tables_to_port_info_map.pop(table),
+                            )
+                            for table in tables_to_port
+                        ],
+                        consumeErrors=True,
+                    )
                 )
-            )
+
+                tables_ported.update(tables_to_port)
 
             # Step 5. Set up sequences
             self.progress.set_state("Setting up sequence generators")