summary refs log tree commit diff
diff options
context:
space:
mode:
authorDavid Robertson <davidr@element.io>2023-02-20 13:35:24 +0000
committerGitHub <noreply@github.com>2023-02-20 13:35:24 +0000
commite26d7d5ae786df8d9d9a4dbd0f734e5c2f08aafd (patch)
treeba4673575e0cfa18c78ccbd371c0df19bcc263ff
parentAllow health listener resource to load (#15096) (diff)
downloadsynapse-e26d7d5ae786df8d9d9a4dbd0f734e5c2f08aafd.tar.xz
Teach portdb about `un_partial_stated_event_stream` (#15108)
* Sort BOOLEAN_COLUMNS and APPEND_ONLY_TABLES

So I can see if a given table is present in logarithmic time, rather
than linear.

* Teach portdb about `un_partial_stated_event_streams`

* Comments comments comments

* Changelog
-rw-r--r--changelog.d/15108.bugfix1
-rwxr-xr-xsynapse/_scripts/synapse_port_db.py85
2 files changed, 53 insertions, 33 deletions
diff --git a/changelog.d/15108.bugfix b/changelog.d/15108.bugfix
new file mode 100644
index 0000000000..30af8b439d
--- /dev/null
+++ b/changelog.d/15108.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.75 where the [portdb script](https://matrix-org.github.io/synapse/release-v1.78/postgres.html#porting-from-sqlite) would fail to run after a room had been faster-joined.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 5e137dbbf7..0d35e0af8f 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -94,61 +94,80 @@ reactor = cast(ISynapseReactor, reactor_)
 logger = logging.getLogger("synapse_port_db")
 
 
+# SQLite doesn't have a dedicated boolean type (it stores True/False as 1/0). This means
+# portdb will read sqlite bools as integers, then try to insert them into postgres
+# boolean columns---which fails. Lacking some Python-parseable metaschema, we must
+# specify which integer columns should be inserted as booleans into postgres.
 BOOLEAN_COLUMNS = {
-    "events": ["processed", "outlier", "contains_url"],
-    "rooms": ["is_public", "has_auth_chain_index"],
+    "access_tokens": ["used"],
+    "account_validity": ["email_sent"],
+    "device_lists_changes_in_room": ["converted_to_destinations"],
+    "device_lists_outbound_pokes": ["sent"],
+    "devices": ["hidden"],
+    "e2e_fallback_keys_json": ["used"],
+    "e2e_room_keys": ["is_verified"],
     "event_edges": ["is_state"],
+    "events": ["processed", "outlier", "contains_url"],
+    "local_media_repository": ["safe_from_quarantine"],
     "presence_list": ["accepted"],
     "presence_stream": ["currently_active"],
     "public_room_list_stream": ["visibility"],
-    "devices": ["hidden"],
-    "device_lists_outbound_pokes": ["sent"],
-    "users_who_share_rooms": ["share_private"],
-    "e2e_room_keys": ["is_verified"],
-    "account_validity": ["email_sent"],
+    "pushers": ["enabled"],
     "redactions": ["have_censored"],
     "room_stats_state": ["is_federatable"],
-    "local_media_repository": ["safe_from_quarantine"],
+    "rooms": ["is_public", "has_auth_chain_index"],
     "users": ["shadow_banned", "approved"],
-    "e2e_fallback_keys_json": ["used"],
-    "access_tokens": ["used"],
-    "device_lists_changes_in_room": ["converted_to_destinations"],
-    "pushers": ["enabled"],
+    "un_partial_stated_event_stream": ["rejection_status_changed"],
+    "users_who_share_rooms": ["share_private"],
 }
 
 
+# These tables are never deleted from in normal operation [*], so we can resume porting
+# over rows from a previous attempt rather than starting from scratch.
+#
+# [*]: We do delete from many of these tables when purging a room, and
+#      presumably when purging old events. So we might e.g.
+#
+#      1. Run portdb and port half of some table.
+#      2. Stop portdb.
+#      3. Purge something, deleting some of the rows we've ported over.
+#      4. Restart portdb. The rows deleted from sqlite are still present in postgres.
+#
+#      But this isn't the end of the world: we should be able to repeat the purge
+#      on the postgres DB when porting completes.
 APPEND_ONLY_TABLES = [
+    "cache_invalidation_stream_by_instance",
+    "event_auth",
+    "event_edges",
+    "event_json",
     "event_reference_hashes",
+    "event_search",
+    "event_to_state_groups",
     "events",
-    "event_json",
-    "state_events",
-    "room_memberships",
-    "topics",
-    "room_names",
-    "rooms",
+    "ex_outlier_stream",
     "local_media_repository",
     "local_media_repository_thumbnails",
+    "presence_stream",
+    "public_room_list_stream",
+    "push_rules_stream",
+    "received_transactions",
+    "redactions",
+    "rejections",
     "remote_media_cache",
     "remote_media_cache_thumbnails",
-    "redactions",
-    "event_edges",
-    "event_auth",
-    "received_transactions",
+    "room_memberships",
+    "room_names",
+    "rooms",
     "sent_transactions",
-    "transaction_id_to_pdu",
-    "users",
+    "state_events",
+    "state_group_edges",
     "state_groups",
     "state_groups_state",
-    "event_to_state_groups",
-    "rejections",
-    "event_search",
-    "presence_stream",
-    "push_rules_stream",
-    "ex_outlier_stream",
-    "cache_invalidation_stream_by_instance",
-    "public_room_list_stream",
-    "state_group_edges",
     "stream_ordering_to_exterm",
+    "topics",
+    "transaction_id_to_pdu",
+    "un_partial_stated_event_stream",
+    "users",
 ]