From f160fe18e3cc5604375f491300d12dd5c7e9b9b2 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 20 Jan 2022 13:38:44 +0000 Subject: Debug for device lists updates (#11760) Debug for #8631. I'm having a hard time tracking down what's going wrong in that issue. In the reported example, I could see server A sending federation traffic to server B and all was well. Yet B reports out-of-sync device updates from A. I couldn't see what was _in_ the events being sent from A to B. So I have added some crude logging to track - when we have updates to send to a remote HS - the edus we actually accumulate to send - when a federation transaction includes a device list update edu - when such an EDU is received This is a bit of a sledgehammer. --- changelog.d/11760.misc | 1 + synapse/federation/sender/transaction_manager.py | 12 ++++++++++++ synapse/federation/transport/server/federation.py | 15 +++++++++++++++ synapse/storage/databases/main/devices.py | 18 ++++++++++++++++++ 4 files changed, 46 insertions(+) create mode 100644 changelog.d/11760.misc diff --git a/changelog.d/11760.misc b/changelog.d/11760.misc new file mode 100644 index 0000000000..6cb1b5dd49 --- /dev/null +++ b/changelog.d/11760.misc @@ -0,0 +1 @@ +Add optional debugging to investigate [issue 8631](https://github.com/matrix-org/synapse/issues/8631). \ No newline at end of file diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index ab935e5a7e..742ee57255 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -35,6 +35,7 @@ if TYPE_CHECKING: import synapse.server logger = logging.getLogger(__name__) +issue_8631_logger = logging.getLogger("synapse.8631_debug") last_pdu_ts_metric = Gauge( "synapse_federation_last_sent_pdu_time", @@ -124,6 +125,17 @@ class TransactionManager: len(pdus), len(edus), ) + if issue_8631_logger.isEnabledFor(logging.DEBUG): + DEVICE_UPDATE_EDUS = {"m.device_list_update", "m.signing_key_update"} + device_list_updates = [ + edu.content for edu in edus if edu.edu_type in DEVICE_UPDATE_EDUS + ] + if device_list_updates: + issue_8631_logger.debug( + "about to send txn [%s] including device list updates: %s", + transaction.transaction_id, + device_list_updates, + ) # Actually send the transaction diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py index 77bfd88ad0..beadfa422b 100644 --- a/synapse/federation/transport/server/federation.py +++ b/synapse/federation/transport/server/federation.py @@ -36,6 +36,7 @@ from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) +issue_8631_logger = logging.getLogger("synapse.8631_debug") class BaseFederationServerServlet(BaseFederationServlet): @@ -95,6 +96,20 @@ class FederationSendServlet(BaseFederationServerServlet): len(transaction_data.get("edus", [])), ) + if issue_8631_logger.isEnabledFor(logging.DEBUG): + DEVICE_UPDATE_EDUS = {"m.device_list_update", "m.signing_key_update"} + device_list_updates = [ + edu.content + for edu in transaction_data.get("edus", []) + if edu.edu_type in DEVICE_UPDATE_EDUS + ] + if device_list_updates: + issue_8631_logger.debug( + "received transaction [%s] including device list updates: %s", + transaction_id, + device_list_updates, + ) + except Exception as e: logger.exception(e) return 400, {"error": "Invalid transaction"} diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 8f0cd0695f..b2a5cd9a65 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -53,6 +53,7 @@ if TYPE_CHECKING: from synapse.server import HomeServer logger = logging.getLogger(__name__) +issue_8631_logger = logging.getLogger("synapse.8631_debug") DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( "drop_device_list_streams_non_unique_indexes" @@ -229,6 +230,12 @@ class DeviceWorkerStore(SQLBaseStore): if not updates: return now_stream_id, [] + if issue_8631_logger.isEnabledFor(logging.DEBUG): + data = {(user, device): stream_id for user, device, stream_id, _ in updates} + issue_8631_logger.debug( + "device updates need to be sent to %s: %s", destination, data + ) + # get the cross-signing keys of the users in the list, so that we can # determine which of the device changes were cross-signing keys users = {r[0] for r in updates} @@ -365,6 +372,17 @@ class DeviceWorkerStore(SQLBaseStore): # and remove the length budgeting above. results.append(("org.matrix.signing_key_update", result)) + if issue_8631_logger.isEnabledFor(logging.DEBUG): + for (user_id, edu) in results: + issue_8631_logger.debug( + "device update to %s for %s from %s to %s: %s", + destination, + user_id, + from_stream_id, + last_processed_stream_id, + edu, + ) + return last_processed_stream_id, results def _get_device_updates_by_remote_txn( -- cgit 1.4.1