From a86b2f6837f0a067b0a014fbf5140e8773b8da2e Mon Sep 17 00:00:00 2001 From: Shay Date: Tue, 11 Oct 2022 11:18:45 -0700 Subject: Fix a bug where redactions were not being sent over federation if we did not have the original event. (#13813) --- synapse/federation/sender/__init__.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'synapse/federation/sender') diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index a6cb3ba58f..774ecd81b6 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -353,21 +353,25 @@ class FederationSender(AbstractFederationSender): last_token = await self.store.get_federation_out_pos("events") ( next_token, - events, event_to_received_ts, - ) = await self.store.get_all_new_events_stream( + ) = await self.store.get_all_new_event_ids_stream( last_token, self._last_poked_id, limit=100 ) + event_ids = event_to_received_ts.keys() + event_entries = await self.store.get_unredacted_events_from_cache_or_db( + event_ids + ) + logger.debug( "Handling %i -> %i: %i events to send (current id %i)", last_token, next_token, - len(events), + len(event_entries), self._last_poked_id, ) - if not events and next_token >= self._last_poked_id: + if not event_entries and next_token >= self._last_poked_id: logger.debug("All events processed") break @@ -508,8 +512,14 @@ class FederationSender(AbstractFederationSender): await handle_event(event) events_by_room: Dict[str, List[EventBase]] = {} - for event in events: - events_by_room.setdefault(event.room_id, []).append(event) + + for event_id in event_ids: + # `event_entries` is unsorted, so we have to iterate over `event_ids` + # to ensure the events are in the right order + event_cache = event_entries.get(event_id) + if event_cache: + event = event_cache.event + events_by_room.setdefault(event.room_id, []).append(event) await make_deferred_yieldable( defer.gatherResults( @@ -524,9 +534,10 @@ class FederationSender(AbstractFederationSender): logger.debug("Successfully handled up to %i", next_token) await self.store.update_federation_out_pos("events", next_token) - if events: + if event_entries: now = self.clock.time_msec() - ts = event_to_received_ts[events[-1].event_id] + last_id = next(reversed(event_ids)) + ts = event_to_received_ts[last_id] assert ts is not None synapse.metrics.event_processing_lag.labels( @@ -536,7 +547,7 @@ class FederationSender(AbstractFederationSender): "federation_sender" ).set(ts) - events_processed_counter.inc(len(events)) + events_processed_counter.inc(len(event_entries)) event_processing_loop_room_count.labels("federation_sender").inc( len(events_by_room) -- cgit 1.5.1 From 09b588854e3a6abc4ea2eaa68bb0345f23be5ce8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 24 Oct 2022 13:05:14 +0100 Subject: Fix `TypeError: 'dict_keys' object is not reversible` (#14280) --- changelog.d/14280.bugfix | 1 + synapse/federation/sender/__init__.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 changelog.d/14280.bugfix (limited to 'synapse/federation/sender') diff --git a/changelog.d/14280.bugfix b/changelog.d/14280.bugfix new file mode 100644 index 0000000000..c546d2be48 --- /dev/null +++ b/changelog.d/14280.bugfix @@ -0,0 +1 @@ +Fix broken outbound federation when using Python 3.7. Broke in v1.70.0rc1. diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 774ecd81b6..3ad483efe0 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -536,8 +536,7 @@ class FederationSender(AbstractFederationSender): if event_entries: now = self.clock.time_msec() - last_id = next(reversed(event_ids)) - ts = event_to_received_ts[last_id] + ts = max(t for t in event_to_received_ts.values() if t) assert ts is not None synapse.metrics.event_processing_lag.labels( -- cgit 1.5.1 From 1526ff389f02d14d0df729bd6ea35836e758c449 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Mon, 21 Nov 2022 16:46:14 +0100 Subject: Faster joins: filter out non local events when a room doesn't have its full state (#14404) Signed-off-by: Mathieu Velten --- changelog.d/14404.misc | 1 + synapse/federation/sender/per_destination_queue.py | 1 + synapse/handlers/federation.py | 15 +++++++---- synapse/visibility.py | 29 +++++++++++++++++++--- tests/test_visibility.py | 10 ++++---- 5 files changed, 43 insertions(+), 13 deletions(-) create mode 100644 changelog.d/14404.misc (limited to 'synapse/federation/sender') diff --git a/changelog.d/14404.misc b/changelog.d/14404.misc new file mode 100644 index 0000000000..b9ab525f2b --- /dev/null +++ b/changelog.d/14404.misc @@ -0,0 +1 @@ +Faster joins: filter out non local events when a room doesn't have its full state. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 084c45a95c..3ae5e8634c 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -505,6 +505,7 @@ class PerDestinationQueue: new_pdus = await filter_events_for_server( self._storage_controllers, self._destination, + self._server_name, new_pdus, redact=False, ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 188f0956ef..d92582fd5c 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -379,6 +379,7 @@ class FederationHandler: filtered_extremities = await filter_events_for_server( self._storage_controllers, self.server_name, + self.server_name, events_to_check, redact=False, check_history_visibility_only=True, @@ -1231,7 +1232,9 @@ class FederationHandler: async def on_backfill_request( self, origin: str, room_id: str, pdu_list: List[str], limit: int ) -> List[EventBase]: - await self._event_auth_handler.assert_host_in_room(room_id, origin) + # We allow partially joined rooms since in this case we are filtering out + # non-local events in `filter_events_for_server`. + await self._event_auth_handler.assert_host_in_room(room_id, origin, True) # Synapse asks for 100 events per backfill request. Do not allow more. limit = min(limit, 100) @@ -1252,7 +1255,7 @@ class FederationHandler: ) events = await filter_events_for_server( - self._storage_controllers, origin, events + self._storage_controllers, origin, self.server_name, events ) return events @@ -1283,7 +1286,7 @@ class FederationHandler: await self._event_auth_handler.assert_host_in_room(event.room_id, origin) events = await filter_events_for_server( - self._storage_controllers, origin, [event] + self._storage_controllers, origin, self.server_name, [event] ) event = events[0] return event @@ -1296,7 +1299,9 @@ class FederationHandler: latest_events: List[str], limit: int, ) -> List[EventBase]: - await self._event_auth_handler.assert_host_in_room(room_id, origin) + # We allow partially joined rooms since in this case we are filtering out + # non-local events in `filter_events_for_server`. + await self._event_auth_handler.assert_host_in_room(room_id, origin, True) # Only allow up to 20 events to be retrieved per request. limit = min(limit, 20) @@ -1309,7 +1314,7 @@ class FederationHandler: ) missing_events = await filter_events_for_server( - self._storage_controllers, origin, missing_events + self._storage_controllers, origin, self.server_name, missing_events ) return missing_events diff --git a/synapse/visibility.py b/synapse/visibility.py index 40a9c5b53f..b443857571 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -563,7 +563,8 @@ def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: async def filter_events_for_server( storage: StorageControllers, - server_name: str, + target_server_name: str, + local_server_name: str, events: List[EventBase], redact: bool = True, check_history_visibility_only: bool = False, @@ -603,7 +604,7 @@ async def filter_events_for_server( # if the server is either in the room or has been invited # into the room. for ev in memberships.values(): - assert get_domain_from_id(ev.state_key) == server_name + assert get_domain_from_id(ev.state_key) == target_server_name memtype = ev.membership if memtype == Membership.JOIN: @@ -622,6 +623,24 @@ async def filter_events_for_server( # to no users having been erased. erased_senders = {} + # Filter out non-local events when we are in the middle of a partial join, since our servers + # list can be out of date and we could leak events to servers not in the room anymore. + # This can also be true for local events but we consider it to be an acceptable risk. + + # We do this check as a first step and before retrieving membership events because + # otherwise a room could be fully joined after we retrieve those, which would then bypass + # this check but would base the filtering on an outdated view of the membership events. + + partial_state_invisible_events = set() + if not check_history_visibility_only: + for e in events: + sender_domain = get_domain_from_id(e.sender) + if ( + sender_domain != local_server_name + and await storage.main.is_partial_state_room(e.room_id) + ): + partial_state_invisible_events.add(e) + # Let's check to see if all the events have a history visibility # of "shared" or "world_readable". If that's the case then we don't # need to check membership (as we know the server is in the room). @@ -636,7 +655,7 @@ async def filter_events_for_server( if event_to_history_vis[e.event_id] not in (HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE) ], - server_name, + target_server_name, ) to_return = [] @@ -645,6 +664,10 @@ async def filter_events_for_server( visible = check_event_is_visible( event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {}) ) + + if e in partial_state_invisible_events: + visible = False + if visible and not erased: to_return.append(e) elif redact: diff --git a/tests/test_visibility.py b/tests/test_visibility.py index c385b2f8d4..d0b9ad5454 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -61,7 +61,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "test_server", events_to_filter + self._storage_controllers, "test_server", "hs", events_to_filter ) ) @@ -83,7 +83,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): self.assertEqual( self.get_success( filter_events_for_server( - self._storage_controllers, "remote_hs", [outlier] + self._storage_controllers, "remote_hs", "hs", [outlier] ) ), [outlier], @@ -94,7 +94,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "remote_hs", [outlier, evt] + self._storage_controllers, "remote_hs", "local_hs", [outlier, evt] ) ) self.assertEqual(len(filtered), 2, f"expected 2 results, got: {filtered}") @@ -106,7 +106,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): # be redacted) filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "other_server", [outlier, evt] + self._storage_controllers, "other_server", "local_hs", [outlier, evt] ) ) self.assertEqual(filtered[0], outlier) @@ -141,7 +141,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): # ... and the filtering happens. filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "test_server", events_to_filter + self._storage_controllers, "test_server", "local_hs", events_to_filter ) ) -- cgit 1.5.1 From 39cde585bf1e6cf3d32af9302437b37bae7a64b8 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Thu, 24 Nov 2022 18:09:47 +0100 Subject: Faster joins: use initial list of servers if we don't have the full state yet (#14408) Signed-off-by: Mathieu Velten Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/14408.misc | 1 + synapse/federation/sender/__init__.py | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14408.misc (limited to 'synapse/federation/sender') diff --git a/changelog.d/14408.misc b/changelog.d/14408.misc new file mode 100644 index 0000000000..2c77d97591 --- /dev/null +++ b/changelog.d/14408.misc @@ -0,0 +1 @@ +Faster joins: send events to initial list of servers if we don't have the full state yet. diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 3ad483efe0..fc1d8c88a7 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -434,7 +434,23 @@ class FederationSender(AbstractFederationSender): # If there are no prev event IDs then the state is empty # and so no remote servers in the room destinations = set() - else: + + if destinations is None: + # During partial join we use the set of servers that we got + # when beginning the join. It's still possible that we send + # events to servers that left the room in the meantime, but + # we consider that an acceptable risk since it is only our own + # events that we leak and not other server's ones. + partial_state_destinations = ( + await self.store.get_partial_state_servers_at_join( + event.room_id + ) + ) + + if len(partial_state_destinations) > 0: + destinations = partial_state_destinations + + if destinations is None: # We check the external cache for the destinations, which is # stored per state group. -- cgit 1.5.1 From d748bbc8f8268d2e8457374d529adafb20b9f5f4 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 28 Nov 2022 09:40:17 -0500 Subject: Include thread information when sending receipts over federation. (#14466) Include the thread_id field when sending read receipts over federation. This might result in the same user having multiple read receipts per-room, meaning multiple EDUs must be sent to encapsulate those receipts. This restructures the PerDestinationQueue APIs to support multiple receipt EDUs, queue_read_receipt now becomes linear time in the number of queued threaded receipts in the room for the given user, it is expected this is a small number since receipt EDUs are sent as filler in transactions. --- changelog.d/14466.bugfix | 1 + synapse/federation/sender/per_destination_queue.py | 183 ++++++++++++++------- synapse/handlers/receipts.py | 1 - tests/federation/test_federation_sender.py | 77 +++++++++ 4 files changed, 198 insertions(+), 64 deletions(-) create mode 100644 changelog.d/14466.bugfix (limited to 'synapse/federation/sender') diff --git a/changelog.d/14466.bugfix b/changelog.d/14466.bugfix new file mode 100644 index 0000000000..82f6e6b68e --- /dev/null +++ b/changelog.d/14466.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.70.0 where a receipt's thread ID was not sent over federation. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 3ae5e8634c..5af2784f1e 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -35,7 +35,7 @@ from synapse.logging import issue9533_logger from synapse.logging.opentracing import SynapseTags, set_tag from synapse.metrics import sent_transactions_counter from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.types import ReadReceipt +from synapse.types import JsonDict, ReadReceipt from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter from synapse.visibility import filter_events_for_server @@ -136,8 +136,11 @@ class PerDestinationQueue: # destination self._pending_presence: Dict[str, UserPresenceState] = {} - # room_id -> receipt_type -> user_id -> receipt_dict - self._pending_rrs: Dict[str, Dict[str, Dict[str, dict]]] = {} + # List of room_id -> receipt_type -> user_id -> receipt_dict, + # + # Each receipt can only have a single receipt per + # (room ID, receipt type, user ID, thread ID) tuple. + self._pending_receipt_edus: List[Dict[str, Dict[str, Dict[str, dict]]]] = [] self._rrs_pending_flush = False # stream_id of last successfully sent to-device message. @@ -202,17 +205,53 @@ class PerDestinationQueue: Args: receipt: receipt to be queued """ - self._pending_rrs.setdefault(receipt.room_id, {}).setdefault( - receipt.receipt_type, {} - )[receipt.user_id] = {"event_ids": receipt.event_ids, "data": receipt.data} + serialized_receipt: JsonDict = { + "event_ids": receipt.event_ids, + "data": receipt.data, + } + if receipt.thread_id is not None: + serialized_receipt["data"]["thread_id"] = receipt.thread_id + + # Find which EDU to add this receipt to. There's three situations depending + # on the (room ID, receipt type, user, thread ID) tuple: + # + # 1. If it fully matches, clobber the information. + # 2. If it is missing, add the information. + # 3. If the subset tuple of (room ID, receipt type, user) matches, check + # the next EDU (or add a new EDU). + for edu in self._pending_receipt_edus: + receipt_content = edu.setdefault(receipt.room_id, {}).setdefault( + receipt.receipt_type, {} + ) + # If this room ID, receipt type, user ID is not in this EDU, OR if + # the full tuple matches, use the current EDU. + if ( + receipt.user_id not in receipt_content + or receipt_content[receipt.user_id].get("thread_id") + == receipt.thread_id + ): + receipt_content[receipt.user_id] = serialized_receipt + break + + # If no matching EDU was found, create a new one. + else: + self._pending_receipt_edus.append( + { + receipt.room_id: { + receipt.receipt_type: {receipt.user_id: serialized_receipt} + } + } + ) def flush_read_receipts_for_room(self, room_id: str) -> None: - # if we don't have any read-receipts for this room, it may be that we've already - # sent them out, so we don't need to flush. - if room_id not in self._pending_rrs: - return - self._rrs_pending_flush = True - self.attempt_new_transaction() + # If there are any pending receipts for this room then force-flush them + # in a new transaction. + for edu in self._pending_receipt_edus: + if room_id in edu: + self._rrs_pending_flush = True + self.attempt_new_transaction() + # No use in checking remaining EDUs if the room was found. + break def send_keyed_edu(self, edu: Edu, key: Hashable) -> None: self._pending_edus_keyed[(edu.edu_type, key)] = edu @@ -351,7 +390,7 @@ class PerDestinationQueue: self._pending_edus = [] self._pending_edus_keyed = {} self._pending_presence = {} - self._pending_rrs = {} + self._pending_receipt_edus = [] self._start_catching_up() except FederationDeniedError as e: @@ -543,22 +582,27 @@ class PerDestinationQueue: self._destination, last_successful_stream_ordering ) - def _get_rr_edus(self, force_flush: bool) -> Iterable[Edu]: - if not self._pending_rrs: + def _get_receipt_edus(self, force_flush: bool, limit: int) -> Iterable[Edu]: + if not self._pending_receipt_edus: return if not force_flush and not self._rrs_pending_flush: # not yet time for this lot return - edu = Edu( - origin=self._server_name, - destination=self._destination, - edu_type=EduTypes.RECEIPT, - content=self._pending_rrs, - ) - self._pending_rrs = {} - self._rrs_pending_flush = False - yield edu + # Send at most limit EDUs for receipts. + for content in self._pending_receipt_edus[:limit]: + yield Edu( + origin=self._server_name, + destination=self._destination, + edu_type=EduTypes.RECEIPT, + content=content, + ) + self._pending_receipt_edus = self._pending_receipt_edus[limit:] + + # If there are still pending read-receipts, don't reset the pending flush + # flag. + if not self._pending_receipt_edus: + self._rrs_pending_flush = False def _pop_pending_edus(self, limit: int) -> List[Edu]: pending_edus = self._pending_edus @@ -645,27 +689,61 @@ class _TransactionQueueManager: async def __aenter__(self) -> Tuple[List[EventBase], List[Edu]]: # First we calculate the EDUs we want to send, if any. - # We start by fetching device related EDUs, i.e device updates and to - # device messages. We have to keep 2 free slots for presence and rr_edus. - device_edu_limit = MAX_EDUS_PER_TRANSACTION - 2 + # There's a maximum number of EDUs that can be sent with a transaction, + # generally device updates and to-device messages get priority, but we + # want to ensure that there's room for some other EDUs as well. + # + # This is done by: + # + # * Add a presence EDU, if one exists. + # * Add up-to a small limit of read receipt EDUs. + # * Add to-device EDUs, but leave some space for device list updates. + # * Add device list updates EDUs. + # * If there's any remaining room, add other EDUs. + pending_edus = [] + + # Add presence EDU. + if self.queue._pending_presence: + pending_edus.append( + Edu( + origin=self.queue._server_name, + destination=self.queue._destination, + edu_type=EduTypes.PRESENCE, + content={ + "push": [ + format_user_presence_state( + presence, self.queue._clock.time_msec() + ) + for presence in self.queue._pending_presence.values() + ] + }, + ) + ) + self.queue._pending_presence = {} - # We prioritize to-device messages so that existing encryption channels + # Add read receipt EDUs. + pending_edus.extend(self.queue._get_receipt_edus(force_flush=False, limit=5)) + edu_limit = MAX_EDUS_PER_TRANSACTION - len(pending_edus) + + # Next, prioritize to-device messages so that existing encryption channels # work. We also keep a few slots spare (by reducing the limit) so that # we can still trickle out some device list updates. ( to_device_edus, device_stream_id, - ) = await self.queue._get_to_device_message_edus(device_edu_limit - 10) + ) = await self.queue._get_to_device_message_edus(edu_limit - 10) if to_device_edus: self._device_stream_id = device_stream_id else: self.queue._last_device_stream_id = device_stream_id - device_edu_limit -= len(to_device_edus) + pending_edus.extend(to_device_edus) + edu_limit -= len(to_device_edus) + # Add device list update EDUs. device_update_edus, dev_list_id = await self.queue._get_device_update_edus( - device_edu_limit + edu_limit ) if device_update_edus: @@ -673,40 +751,17 @@ class _TransactionQueueManager: else: self.queue._last_device_list_stream_id = dev_list_id - pending_edus = device_update_edus + to_device_edus - - # Now add the read receipt EDU. - pending_edus.extend(self.queue._get_rr_edus(force_flush=False)) - - # And presence EDU. - if self.queue._pending_presence: - pending_edus.append( - Edu( - origin=self.queue._server_name, - destination=self.queue._destination, - edu_type=EduTypes.PRESENCE, - content={ - "push": [ - format_user_presence_state( - presence, self.queue._clock.time_msec() - ) - for presence in self.queue._pending_presence.values() - ] - }, - ) - ) - self.queue._pending_presence = {} + pending_edus.extend(device_update_edus) + edu_limit -= len(device_update_edus) # Finally add any other types of EDUs if there is room. - pending_edus.extend( - self.queue._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus)) - ) - while ( - len(pending_edus) < MAX_EDUS_PER_TRANSACTION - and self.queue._pending_edus_keyed - ): + other_edus = self.queue._pop_pending_edus(edu_limit) + pending_edus.extend(other_edus) + edu_limit -= len(other_edus) + while edu_limit > 0 and self.queue._pending_edus_keyed: _, val = self.queue._pending_edus_keyed.popitem() pending_edus.append(val) + edu_limit -= 1 # Now we look for any PDUs to send, by getting up to 50 PDUs from the # queue @@ -717,8 +772,10 @@ class _TransactionQueueManager: # if we've decided to send a transaction anyway, and we have room, we # may as well send any pending RRs - if len(pending_edus) < MAX_EDUS_PER_TRANSACTION: - pending_edus.extend(self.queue._get_rr_edus(force_flush=True)) + if edu_limit: + pending_edus.extend( + self.queue._get_receipt_edus(force_flush=True, limit=edu_limit) + ) if self._pdus: self._last_stream_ordering = self._pdus[ diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index ac01582442..6a4fed1156 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -92,7 +92,6 @@ class ReceiptsHandler: continue # Check if these receipts apply to a thread. - thread_id = None data = user_values.get("data", {}) thread_id = data.get("thread_id") # If the thread ID is invalid, consider it missing. diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py index f1e357764f..01f147418b 100644 --- a/tests/federation/test_federation_sender.py +++ b/tests/federation/test_federation_sender.py @@ -83,6 +83,83 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase): ], ) + @override_config({"send_federation": True}) + def test_send_receipts_thread(self): + mock_send_transaction = ( + self.hs.get_federation_transport_client().send_transaction + ) + mock_send_transaction.return_value = make_awaitable({}) + + # Create receipts for: + # + # * The same room / user on multiple threads. + # * A different user in the same room. + sender = self.hs.get_federation_sender() + for user, thread in ( + ("alice", None), + ("alice", "thread"), + ("bob", None), + ("bob", "diff-thread"), + ): + receipt = ReadReceipt( + "room_id", + "m.read", + user, + ["event_id"], + thread_id=thread, + data={"ts": 1234}, + ) + self.successResultOf( + defer.ensureDeferred(sender.send_read_receipt(receipt)) + ) + + self.pump() + + # expect a call to send_transaction with two EDUs to separate threads. + mock_send_transaction.assert_called_once() + json_cb = mock_send_transaction.call_args[0][1] + data = json_cb() + # Note that the ordering of the EDUs doesn't matter. + self.assertCountEqual( + data["edus"], + [ + { + "edu_type": EduTypes.RECEIPT, + "content": { + "room_id": { + "m.read": { + "alice": { + "event_ids": ["event_id"], + "data": {"ts": 1234, "thread_id": "thread"}, + }, + "bob": { + "event_ids": ["event_id"], + "data": {"ts": 1234, "thread_id": "diff-thread"}, + }, + } + } + }, + }, + { + "edu_type": EduTypes.RECEIPT, + "content": { + "room_id": { + "m.read": { + "alice": { + "event_ids": ["event_id"], + "data": {"ts": 1234}, + }, + "bob": { + "event_ids": ["event_id"], + "data": {"ts": 1234}, + }, + } + } + }, + }, + ], + ) + @override_config({"send_federation": True}) def test_send_receipts_with_backoff(self): """Send two receipts in quick succession; the second should be flushed, but -- cgit 1.5.1 From 4569eda94423a10abb69e0f4d5f37eb723ed764b Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Wed, 30 Nov 2022 13:39:47 +0100 Subject: Use servers list approx to send read receipts when in partial state (#14549) Signed-off-by: Mathieu Velten --- changelog.d/14549.misc | 1 + synapse/federation/sender/__init__.py | 2 +- tests/federation/test_federation_sender.py | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14549.misc (limited to 'synapse/federation/sender') diff --git a/changelog.d/14549.misc b/changelog.d/14549.misc new file mode 100644 index 0000000000..d9d863dd20 --- /dev/null +++ b/changelog.d/14549.misc @@ -0,0 +1 @@ +Faster joins: use servers list approximation to send read receipts when in partial state instead of waiting for the full state of the room. \ No newline at end of file diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index fc1d8c88a7..30ebd62883 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -647,7 +647,7 @@ class FederationSender(AbstractFederationSender): room_id = receipt.room_id # Work out which remote servers should be poked and poke them. - domains_set = await self._storage_controllers.state.get_current_hosts_in_room( + domains_set = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation( room_id ) domains = [ diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py index 01f147418b..cbc99d30b9 100644 --- a/tests/federation/test_federation_sender.py +++ b/tests/federation/test_federation_sender.py @@ -38,6 +38,10 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase): return_value=make_awaitable({"test", "host2"}) ) + hs.get_storage_controllers().state.get_current_hosts_in_room_or_partial_state_approximation = ( + hs.get_storage_controllers().state.get_current_hosts_in_room + ) + return hs @override_config({"send_federation": True}) -- cgit 1.5.1 From cb59e080627745d089d073d9dac276362d9abaf6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 6 Dec 2022 09:52:55 +0000 Subject: Improve logging and opentracing for to-device message handling (#14598) A batch of changes intended to make it easier to trace to-device messages through the system. The intention here is that a client can set a property org.matrix.msgid in any to-device message it sends. That ID is then included in any tracing or logging related to the message. (Suggestions as to where this field should be documented welcome. I'm not enthusiastic about speccing it - it's very much an optional extra to help with debugging.) I've also generally improved the data we send to opentracing for these messages. --- changelog.d/14598.feature | 1 + synapse/api/constants.py | 3 + synapse/federation/sender/per_destination_queue.py | 2 +- synapse/handlers/appservice.py | 3 - synapse/handlers/devicemessage.py | 36 +++++---- synapse/handlers/sync.py | 26 ++++-- synapse/logging/opentracing.py | 11 ++- synapse/rest/client/sendtodevice.py | 1 - synapse/storage/databases/main/deviceinbox.py | 92 ++++++++++++++++++---- tests/handlers/test_appservice.py | 7 +- 10 files changed, 136 insertions(+), 46 deletions(-) create mode 100644 changelog.d/14598.feature (limited to 'synapse/federation/sender') diff --git a/changelog.d/14598.feature b/changelog.d/14598.feature new file mode 100644 index 0000000000..88d561e286 --- /dev/null +++ b/changelog.d/14598.feature @@ -0,0 +1 @@ +Improve opentracing and logging for to-device message handling. \ No newline at end of file diff --git a/synapse/api/constants.py b/synapse/api/constants.py index bc04a0755b..89723d24fa 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -230,6 +230,9 @@ class EventContentFields: # The authorising user for joining a restricted room. AUTHORISING_USER: Final = "join_authorised_via_users_server" + # an unspecced field added to to-device messages to identify them uniquely-ish + TO_DEVICE_MSGID: Final = "org.matrix.msgid" + class RoomTypes: """Understood values of the room_type field of m.room.create events.""" diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 5af2784f1e..ffc9d95ee7 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -641,7 +641,7 @@ class PerDestinationQueue: if not message_id: continue - set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id) + set_tag(SynapseTags.TO_DEVICE_EDU_ID, message_id) edus = [ Edu( diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index f68027aaed..5d1d21cdc8 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -578,9 +578,6 @@ class ApplicationServicesHandler: device_id, ), messages in recipient_device_to_messages.items(): for message_json in messages: - # Remove 'message_id' from the to-device message, as it's an internal ID - message_json.pop("message_id", None) - message_payload.append( { "to_user_id": user_id, diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index 444c08bc2e..75e89850f5 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -15,7 +15,7 @@ import logging from typing import TYPE_CHECKING, Any, Dict -from synapse.api.constants import EduTypes, ToDeviceEventTypes +from synapse.api.constants import EduTypes, EventContentFields, ToDeviceEventTypes from synapse.api.errors import SynapseError from synapse.api.ratelimiting import Ratelimiter from synapse.logging.context import run_in_background @@ -216,14 +216,24 @@ class DeviceMessageHandler: """ sender_user_id = requester.user.to_string() - message_id = random_string(16) - set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id) - - log_kv({"number_of_to_device_messages": len(messages)}) - set_tag("sender", sender_user_id) + set_tag(SynapseTags.TO_DEVICE_TYPE, message_type) + set_tag(SynapseTags.TO_DEVICE_SENDER, sender_user_id) local_messages = {} remote_messages: Dict[str, Dict[str, Dict[str, JsonDict]]] = {} for user_id, by_device in messages.items(): + # add an opentracing log entry for each message + for device_id, message_content in by_device.items(): + log_kv( + { + "event": "send_to_device_message", + "user_id": user_id, + "device_id": device_id, + EventContentFields.TO_DEVICE_MSGID: message_content.get( + EventContentFields.TO_DEVICE_MSGID + ), + } + ) + # Ratelimit local cross-user key requests by the sending device. if ( message_type == ToDeviceEventTypes.RoomKeyRequest @@ -233,6 +243,7 @@ class DeviceMessageHandler: requester, (sender_user_id, requester.device_id) ) if not allowed: + log_kv({"message": f"dropping key requests to {user_id}"}) logger.info( "Dropping room_key_request from %s to %s due to rate limit", sender_user_id, @@ -247,18 +258,11 @@ class DeviceMessageHandler: "content": message_content, "type": message_type, "sender": sender_user_id, - "message_id": message_id, } for device_id, message_content in by_device.items() } if messages_by_device: local_messages[user_id] = messages_by_device - log_kv( - { - "user_id": user_id, - "device_id": list(messages_by_device), - } - ) else: destination = get_domain_from_id(user_id) remote_messages.setdefault(destination, {})[user_id] = by_device @@ -267,7 +271,11 @@ class DeviceMessageHandler: remote_edu_contents = {} for destination, messages in remote_messages.items(): - log_kv({"destination": destination}) + # The EDU contains a "message_id" property which is used for + # idempotence. Make up a random one. + message_id = random_string(16) + log_kv({"destination": destination, "message_id": message_id}) + remote_edu_contents[destination] = { "messages": messages, "sender": sender_user_id, diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0b395a104d..dace9b606f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -31,14 +31,20 @@ from typing import ( import attr from prometheus_client import Counter -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.api.filtering import FilterCollection from synapse.api.presence import UserPresenceState from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import EventBase from synapse.handlers.relations import BundledAggregations from synapse.logging.context import current_context -from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span +from synapse.logging.opentracing import ( + SynapseTags, + log_kv, + set_tag, + start_active_span, + trace, +) from synapse.push.clientformat import format_push_rules_for_user from synapse.storage.databases.main.event_push_actions import RoomNotifCounts from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary @@ -1586,6 +1592,7 @@ class SyncHandler: else: return DeviceListUpdates() + @trace async def _generate_sync_entry_for_to_device( self, sync_result_builder: "SyncResultBuilder" ) -> None: @@ -1605,11 +1612,16 @@ class SyncHandler: ) for message in messages: - # We pop here as we shouldn't be sending the message ID down - # `/sync` - message_id = message.pop("message_id", None) - if message_id: - set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id) + log_kv( + { + "event": "to_device_message", + "sender": message["sender"], + "type": message["type"], + EventContentFields.TO_DEVICE_MSGID: message["content"].get( + EventContentFields.TO_DEVICE_MSGID + ), + } + ) logger.debug( "Returning %d to-device messages between %d and %d (current token: %d)", diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index b69060854f..a705af8356 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -292,8 +292,15 @@ logger = logging.getLogger(__name__) class SynapseTags: - # The message ID of any to_device message processed - TO_DEVICE_MESSAGE_ID = "to_device.message_id" + # The message ID of any to_device EDU processed + TO_DEVICE_EDU_ID = "to_device.edu_id" + + # Details about to-device messages + TO_DEVICE_TYPE = "to_device.type" + TO_DEVICE_SENDER = "to_device.sender" + TO_DEVICE_RECIPIENT = "to_device.recipient" + TO_DEVICE_RECIPIENT_DEVICE = "to_device.recipient_device" + TO_DEVICE_MSGID = "to_device.msgid" # client-generated ID # Whether the sync response has new data to be returned to the client. SYNC_RESULT = "sync.new_data" diff --git a/synapse/rest/client/sendtodevice.py b/synapse/rest/client/sendtodevice.py index 46a8b03829..55d52f0b28 100644 --- a/synapse/rest/client/sendtodevice.py +++ b/synapse/rest/client/sendtodevice.py @@ -46,7 +46,6 @@ class SendToDeviceRestServlet(servlet.RestServlet): def on_PUT( self, request: SynapseRequest, message_type: str, txn_id: str ) -> Awaitable[Tuple[int, JsonDict]]: - set_tag("message_type", message_type) set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request( request, self._put, request, message_type, txn_id diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 73c95ffb6f..48a54d9cb8 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -26,8 +26,15 @@ from typing import ( cast, ) +from synapse.api.constants import EventContentFields from synapse.logging import issue9533_logger -from synapse.logging.opentracing import log_kv, set_tag, trace +from synapse.logging.opentracing import ( + SynapseTags, + log_kv, + set_tag, + start_active_span, + trace, +) from synapse.replication.tcp.streams import ToDeviceStream from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage.database import ( @@ -397,6 +404,17 @@ class DeviceInboxWorkerStore(SQLBaseStore): (recipient_user_id, recipient_device_id), [] ).append(message_dict) + # start a new span for each message, so that we can tag each separately + with start_active_span("get_to_device_message"): + set_tag(SynapseTags.TO_DEVICE_TYPE, message_dict["type"]) + set_tag(SynapseTags.TO_DEVICE_SENDER, message_dict["sender"]) + set_tag(SynapseTags.TO_DEVICE_RECIPIENT, recipient_user_id) + set_tag(SynapseTags.TO_DEVICE_RECIPIENT_DEVICE, recipient_device_id) + set_tag( + SynapseTags.TO_DEVICE_MSGID, + message_dict["content"].get(EventContentFields.TO_DEVICE_MSGID), + ) + if limit is not None and rowcount == limit: # We ended up bumping up against the message limit. There may be more messages # to retrieve. Return what we have, as well as the last stream position that @@ -678,12 +696,35 @@ class DeviceInboxWorkerStore(SQLBaseStore): ], ) - if remote_messages_by_destination: - issue9533_logger.debug( - "Queued outgoing to-device messages with stream_id %i for %s", - stream_id, - list(remote_messages_by_destination.keys()), - ) + for destination, edu in remote_messages_by_destination.items(): + if issue9533_logger.isEnabledFor(logging.DEBUG): + issue9533_logger.debug( + "Queued outgoing to-device messages with " + "stream_id %i, EDU message_id %s, type %s for %s: %s", + stream_id, + edu["message_id"], + edu["type"], + destination, + [ + f"{user_id}/{device_id} (msgid " + f"{msg.get(EventContentFields.TO_DEVICE_MSGID)})" + for (user_id, messages_by_device) in edu["messages"].items() + for (device_id, msg) in messages_by_device.items() + ], + ) + + for (user_id, messages_by_device) in edu["messages"].items(): + for (device_id, msg) in messages_by_device.items(): + with start_active_span("store_outgoing_to_device_message"): + set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["sender"]) + set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["message_id"]) + set_tag(SynapseTags.TO_DEVICE_TYPE, edu["type"]) + set_tag(SynapseTags.TO_DEVICE_RECIPIENT, user_id) + set_tag(SynapseTags.TO_DEVICE_RECIPIENT_DEVICE, device_id) + set_tag( + SynapseTags.TO_DEVICE_MSGID, + msg.get(EventContentFields.TO_DEVICE_MSGID), + ) async with self._device_inbox_id_gen.get_next() as stream_id: now_ms = self._clock.time_msec() @@ -801,7 +842,19 @@ class DeviceInboxWorkerStore(SQLBaseStore): # Only insert into the local inbox if the device exists on # this server device_id = row["device_id"] - message_json = json_encoder.encode(messages_by_device[device_id]) + + with start_active_span("serialise_to_device_message"): + msg = messages_by_device[device_id] + set_tag(SynapseTags.TO_DEVICE_TYPE, msg["type"]) + set_tag(SynapseTags.TO_DEVICE_SENDER, msg["sender"]) + set_tag(SynapseTags.TO_DEVICE_RECIPIENT, user_id) + set_tag(SynapseTags.TO_DEVICE_RECIPIENT_DEVICE, device_id) + set_tag( + SynapseTags.TO_DEVICE_MSGID, + msg["content"].get(EventContentFields.TO_DEVICE_MSGID), + ) + message_json = json_encoder.encode(msg) + messages_json_for_user[device_id] = message_json if messages_json_for_user: @@ -821,15 +874,20 @@ class DeviceInboxWorkerStore(SQLBaseStore): ], ) - issue9533_logger.debug( - "Stored to-device messages with stream_id %i for %s", - stream_id, - [ - (user_id, device_id) - for (user_id, messages_by_device) in local_by_user_then_device.items() - for device_id in messages_by_device.keys() - ], - ) + if issue9533_logger.isEnabledFor(logging.DEBUG): + issue9533_logger.debug( + "Stored to-device messages with stream_id %i: %s", + stream_id, + [ + f"{user_id}/{device_id} (msgid " + f"{msg['content'].get(EventContentFields.TO_DEVICE_MSGID)})" + for ( + user_id, + messages_by_device, + ) in messages_by_user_then_device.items() + for (device_id, msg) in messages_by_device.items() + ], + ) class DeviceInboxBackgroundUpdateStore(SQLBaseStore): diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py index 9ed26d87a7..57bfbd7734 100644 --- a/tests/handlers/test_appservice.py +++ b/tests/handlers/test_appservice.py @@ -765,7 +765,12 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase): fake_device_ids = [f"device_{num}" for num in range(number_of_messages - 1)] messages = { self.exclusive_as_user: { - device_id: to_device_message_content for device_id in fake_device_ids + device_id: { + "type": "test_to_device_message", + "sender": "@some:sender", + "content": to_device_message_content, + } + for device_id in fake_device_ids } } -- cgit 1.5.1