summary refs log tree commit diff
diff options
context:
space:
mode:
authorDavid Robertson <davidr@element.io>2022-01-20 13:38:44 +0000
committerGitHub <noreply@github.com>2022-01-20 13:38:44 +0000
commitf160fe18e3cc5604375f491300d12dd5c7e9b9b2 (patch)
tree4027aaff31d9f01313f2311e8cd65d8c3ffeed7c
parentAllow overriding the complement ref. (#11766) (diff)
downloadsynapse-f160fe18e3cc5604375f491300d12dd5c7e9b9b2.tar.xz
Debug for device lists updates (#11760)
Debug for #8631.

I'm having a hard time tracking down what's going wrong in that issue.
In the reported example, I could see server A sending federation traffic
to server B and all was well. Yet B reports out-of-sync device updates
from A.

I couldn't see what was _in_ the events being sent from A to B. So I
have added some crude logging to track

- when we have updates to send to a remote HS
- the edus we actually accumulate to send
- when a federation transaction includes a device list update edu
- when such an EDU is received

This is a bit of a sledgehammer.
-rw-r--r--changelog.d/11760.misc1
-rw-r--r--synapse/federation/sender/transaction_manager.py12
-rw-r--r--synapse/federation/transport/server/federation.py15
-rw-r--r--synapse/storage/databases/main/devices.py18
4 files changed, 46 insertions, 0 deletions
diff --git a/changelog.d/11760.misc b/changelog.d/11760.misc
new file mode 100644
index 0000000000..6cb1b5dd49
--- /dev/null
+++ b/changelog.d/11760.misc
@@ -0,0 +1 @@
+Add optional debugging to investigate [issue 8631](https://github.com/matrix-org/synapse/issues/8631).
\ No newline at end of file
diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py
index ab935e5a7e..742ee57255 100644
--- a/synapse/federation/sender/transaction_manager.py
+++ b/synapse/federation/sender/transaction_manager.py
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
     import synapse.server
 
 logger = logging.getLogger(__name__)
+issue_8631_logger = logging.getLogger("synapse.8631_debug")
 
 last_pdu_ts_metric = Gauge(
     "synapse_federation_last_sent_pdu_time",
@@ -124,6 +125,17 @@ class TransactionManager:
                 len(pdus),
                 len(edus),
             )
+            if issue_8631_logger.isEnabledFor(logging.DEBUG):
+                DEVICE_UPDATE_EDUS = {"m.device_list_update", "m.signing_key_update"}
+                device_list_updates = [
+                    edu.content for edu in edus if edu.edu_type in DEVICE_UPDATE_EDUS
+                ]
+                if device_list_updates:
+                    issue_8631_logger.debug(
+                        "about to send txn [%s] including device list updates: %s",
+                        transaction.transaction_id,
+                        device_list_updates,
+                    )
 
             # Actually send the transaction
 
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 77bfd88ad0..beadfa422b 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -36,6 +36,7 @@ from synapse.util.ratelimitutils import FederationRateLimiter
 from synapse.util.versionstring import get_version_string
 
 logger = logging.getLogger(__name__)
+issue_8631_logger = logging.getLogger("synapse.8631_debug")
 
 
 class BaseFederationServerServlet(BaseFederationServlet):
@@ -95,6 +96,20 @@ class FederationSendServlet(BaseFederationServerServlet):
                 len(transaction_data.get("edus", [])),
             )
 
+            if issue_8631_logger.isEnabledFor(logging.DEBUG):
+                DEVICE_UPDATE_EDUS = {"m.device_list_update", "m.signing_key_update"}
+                device_list_updates = [
+                    edu.content
+                    for edu in transaction_data.get("edus", [])
+                    if edu.edu_type in DEVICE_UPDATE_EDUS
+                ]
+                if device_list_updates:
+                    issue_8631_logger.debug(
+                        "received transaction [%s] including device list updates: %s",
+                        transaction_id,
+                        device_list_updates,
+                    )
+
         except Exception as e:
             logger.exception(e)
             return 400, {"error": "Invalid transaction"}
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 8f0cd0695f..b2a5cd9a65 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -53,6 +53,7 @@ if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 logger = logging.getLogger(__name__)
+issue_8631_logger = logging.getLogger("synapse.8631_debug")
 
 DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = (
     "drop_device_list_streams_non_unique_indexes"
@@ -229,6 +230,12 @@ class DeviceWorkerStore(SQLBaseStore):
         if not updates:
             return now_stream_id, []
 
+        if issue_8631_logger.isEnabledFor(logging.DEBUG):
+            data = {(user, device): stream_id for user, device, stream_id, _ in updates}
+            issue_8631_logger.debug(
+                "device updates need to be sent to %s: %s", destination, data
+            )
+
         # get the cross-signing keys of the users in the list, so that we can
         # determine which of the device changes were cross-signing keys
         users = {r[0] for r in updates}
@@ -365,6 +372,17 @@ class DeviceWorkerStore(SQLBaseStore):
             #        and remove the length budgeting above.
             results.append(("org.matrix.signing_key_update", result))
 
+        if issue_8631_logger.isEnabledFor(logging.DEBUG):
+            for (user_id, edu) in results:
+                issue_8631_logger.debug(
+                    "device update to %s for %s from %s to %s: %s",
+                    destination,
+                    user_id,
+                    from_stream_id,
+                    last_processed_stream_id,
+                    edu,
+                )
+
         return last_processed_stream_id, results
 
     def _get_device_updates_by_remote_txn(