summary refs log tree commit diff
path: root/synapse/federation
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/federation')
-rw-r--r--synapse/federation/federation_client.py118
-rw-r--r--synapse/federation/sender/__init__.py145
-rw-r--r--synapse/federation/sender/per_destination_queue.py15
3 files changed, 118 insertions, 160 deletions
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py

index f93335edaa..a5b6a61195 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py
@@ -451,6 +451,28 @@ class FederationClient(FederationBase): return signed_auth + def _is_unknown_endpoint( + self, e: HttpResponseException, synapse_error: Optional[SynapseError] = None + ) -> bool: + """ + Returns true if the response was due to an endpoint being unimplemented. + + Args: + e: The error response received from the remote server. + synapse_error: The above error converted to a SynapseError. This is + automatically generated if not provided. + + """ + if synapse_error is None: + synapse_error = e.to_synapse_error() + # There is no good way to detect an "unknown" endpoint. + # + # Dendrite returns a 404 (with no body); synapse returns a 400 + # with M_UNRECOGNISED. + return e.code == 404 or ( + e.code == 400 and synapse_error.errcode == Codes.UNRECOGNIZED + ) + async def _try_destination_list( self, description: str, @@ -468,9 +490,9 @@ class FederationClient(FederationBase): callback: Function to run for each server. Passed a single argument: the server_name to try. - If the callback raises a CodeMessageException with a 300/400 code, - attempts to perform the operation stop immediately and the exception is - reraised. + If the callback raises a CodeMessageException with a 300/400 code or + an UnsupportedRoomVersionError, attempts to perform the operation + stop immediately and the exception is reraised. Otherwise, if the callback raises an Exception the error is logged and the next server tried. Normally the stacktrace is logged but this is @@ -492,8 +514,7 @@ class FederationClient(FederationBase): continue try: - res = await callback(destination) - return res + return await callback(destination) except InvalidResponseError as e: logger.warning("Failed to %s via %s: %s", description, destination, e) except UnsupportedRoomVersionError: @@ -502,17 +523,15 @@ class FederationClient(FederationBase): synapse_error = e.to_synapse_error() failover = False + # Failover on an internal server error, or if the destination + # doesn't implemented the endpoint for some reason. if 500 <= e.code < 600: failover = True - elif failover_on_unknown_endpoint: - # there is no good way to detect an "unknown" endpoint. Dendrite - # returns a 404 (with no body); synapse returns a 400 - # with M_UNRECOGNISED. - if e.code == 404 or ( - e.code == 400 and synapse_error.errcode == Codes.UNRECOGNIZED - ): - failover = True + elif failover_on_unknown_endpoint and self._is_unknown_endpoint( + e, synapse_error + ): + failover = True if not failover: raise synapse_error from e @@ -570,9 +589,8 @@ class FederationClient(FederationBase): UnsupportedRoomVersionError: if remote responds with a room version we don't understand. - SynapseError: if the chosen remote server returns a 300/400 code. - - RuntimeError: if no servers were reachable. + SynapseError: if the chosen remote server returns a 300/400 code, or + no servers successfully handle the request. """ valid_memberships = {Membership.JOIN, Membership.LEAVE} if membership not in valid_memberships: @@ -642,9 +660,8 @@ class FederationClient(FederationBase): ``auth_chain``. Raises: - SynapseError: if the chosen remote server returns a 300/400 code. - - RuntimeError: if no servers were reachable. + SynapseError: if the chosen remote server returns a 300/400 code, or + no servers successfully handle the request. """ async def send_request(destination) -> Dict[str, Any]: @@ -673,7 +690,7 @@ class FederationClient(FederationBase): if create_event is None: # If the state doesn't have a create event then the room is # invalid, and it would fail auth checks anyway. - raise SynapseError(400, "No create event in state") + raise InvalidResponseError("No create event in state") # the room version should be sane. create_room_version = create_event.content.get( @@ -746,16 +763,11 @@ class FederationClient(FederationBase): content=pdu.get_pdu_json(time_now), ) except HttpResponseException as e: - if e.code in [400, 404]: - err = e.to_synapse_error() - - # If we receive an error response that isn't a generic error, or an - # unrecognised endpoint error, we assume that the remote understands - # the v2 invite API and this is a legitimate error. - if err.errcode not in [Codes.UNKNOWN, Codes.UNRECOGNIZED]: - raise err - else: - raise e.to_synapse_error() + # If an error is received that is due to an unrecognised endpoint, + # fallback to the v1 endpoint. Otherwise consider it a legitmate error + # and raise. + if not self._is_unknown_endpoint(e): + raise logger.debug("Couldn't send_join with the v2 API, falling back to the v1 API") @@ -802,6 +814,11 @@ class FederationClient(FederationBase): Returns: The event as a dict as returned by the remote server + + Raises: + SynapseError: if the remote server returns an error or if the server + only supports the v1 endpoint and a room version other than "1" + or "2" is requested. """ time_now = self._clock.time_msec() @@ -817,28 +834,19 @@ class FederationClient(FederationBase): }, ) except HttpResponseException as e: - if e.code in [400, 404]: - err = e.to_synapse_error() - - # If we receive an error response that isn't a generic error, we - # assume that the remote understands the v2 invite API and this - # is a legitimate error. - if err.errcode != Codes.UNKNOWN: - raise err - - # Otherwise, we assume that the remote server doesn't understand - # the v2 invite API. That's ok provided the room uses old-style event - # IDs. + # If an error is received that is due to an unrecognised endpoint, + # fallback to the v1 endpoint if the room uses old-style event IDs. + # Otherwise consider it a legitmate error and raise. + err = e.to_synapse_error() + if self._is_unknown_endpoint(e, err): if room_version.event_format != EventFormatVersions.V1: raise SynapseError( 400, "User's homeserver does not support this room version", Codes.UNSUPPORTED_ROOM_VERSION, ) - elif e.code in (403, 429): - raise e.to_synapse_error() else: - raise + raise err # Didn't work, try v1 API. # Note the v1 API returns a tuple of `(200, content)` @@ -865,9 +873,8 @@ class FederationClient(FederationBase): pdu: event to be sent Raises: - SynapseError if the chosen remote server returns a 300/400 code. - - RuntimeError if no servers were reachable. + SynapseError: if the chosen remote server returns a 300/400 code, or + no servers successfully handle the request. """ async def send_request(destination: str) -> None: @@ -889,16 +896,11 @@ class FederationClient(FederationBase): content=pdu.get_pdu_json(time_now), ) except HttpResponseException as e: - if e.code in [400, 404]: - err = e.to_synapse_error() - - # If we receive an error response that isn't a generic error, or an - # unrecognised endpoint error, we assume that the remote understands - # the v2 invite API and this is a legitimate error. - if err.errcode not in [Codes.UNKNOWN, Codes.UNRECOGNIZED]: - raise err - else: - raise e.to_synapse_error() + # If an error is received that is due to an unrecognised endpoint, + # fallback to the v1 endpoint. Otherwise consider it a legitmate error + # and raise. + if not self._is_unknown_endpoint(e): + raise logger.debug("Couldn't send_leave with the v2 API, falling back to the v1 API") diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 022bbf7dad..deb40f4610 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py
@@ -14,26 +14,19 @@ import abc import logging -from typing import ( - TYPE_CHECKING, - Collection, - Dict, - Hashable, - Iterable, - List, - Optional, - Set, - Tuple, -) +from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Set, Tuple from prometheus_client import Counter +from twisted.internet import defer + import synapse.metrics from synapse.api.presence import UserPresenceState from synapse.events import EventBase from synapse.federation.sender.per_destination_queue import PerDestinationQueue from synapse.federation.sender.transaction_manager import TransactionManager from synapse.federation.units import Edu +from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics import ( LaterGauge, event_processing_loop_counter, @@ -262,27 +255,15 @@ class FederationSender(AbstractFederationSender): if not events and next_token >= self._last_poked_id: break - async def get_destinations_for_event( - event: EventBase, - ) -> Collection[str]: - """Computes the destinations to which this event must be sent. - - This returns an empty tuple when there are no destinations to send to, - or if this event is not from this homeserver and it is not sending - it on behalf of another server. - - Will also filter out destinations which this sender is not responsible for, - if multiple federation senders exist. - """ - + async def handle_event(event: EventBase) -> None: # Only send events for this server. send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of() is_mine = self.is_mine_id(event.sender) if not is_mine and send_on_behalf_of is None: - return () + return if not event.internal_metadata.should_proactively_send(): - return () + return destinations = None # type: Optional[Set[str]] if not event.prev_event_ids(): @@ -317,7 +298,7 @@ class FederationSender(AbstractFederationSender): "Failed to calculate hosts in room for event: %s", event.event_id, ) - return () + return destinations = { d @@ -327,15 +308,17 @@ class FederationSender(AbstractFederationSender): ) } - destinations.discard(self.server_name) - if send_on_behalf_of is not None: # If we are sending the event on behalf of another server # then it already has the event and there is no reason to # send the event to it. destinations.discard(send_on_behalf_of) + logger.debug("Sending %s to %r", event, destinations) + if destinations: + await self._send_pdu(event, destinations) + now = self.clock.time_msec() ts = await self.store.get_received_ts(event.event_id) @@ -343,29 +326,24 @@ class FederationSender(AbstractFederationSender): "federation_sender" ).observe((now - ts) / 1000) - return destinations - return () - - async def get_federatable_events_and_destinations( - events: Iterable[EventBase], - ) -> List[Tuple[EventBase, Collection[str]]]: - with Measure(self.clock, "get_destinations_for_events"): - # Fetch federation destinations per event, - # skip if get_destinations_for_event returns an empty collection, - # return list of event->destinations pairs. - return [ - (event, dests) - for (event, dests) in [ - (event, await get_destinations_for_event(event)) - for event in events - ] - if dests - ] - - events_and_dests = await get_federatable_events_and_destinations(events) - - # Send corresponding events to each destination queue - await self._distribute_events(events_and_dests) + async def handle_room_events(events: Iterable[EventBase]) -> None: + with Measure(self.clock, "handle_room_events"): + for event in events: + await handle_event(event) + + events_by_room = {} # type: Dict[str, List[EventBase]] + for event in events: + events_by_room.setdefault(event.room_id, []).append(event) + + await make_deferred_yieldable( + defer.gatherResults( + [ + run_in_background(handle_room_events, evs) + for evs in events_by_room.values() + ], + consumeErrors=True, + ) + ) await self.store.update_federation_out_pos("events", next_token) @@ -383,7 +361,7 @@ class FederationSender(AbstractFederationSender): events_processed_counter.inc(len(events)) event_processing_loop_room_count.labels("federation_sender").inc( - len({event.room_id for event in events}) + len(events_by_room) ) event_processing_loop_counter.labels("federation_sender").inc() @@ -395,53 +373,34 @@ class FederationSender(AbstractFederationSender): finally: self._is_processing = False - async def _distribute_events( - self, - events_and_dests: Iterable[Tuple[EventBase, Collection[str]]], - ) -> None: - """Distribute events to the respective per_destination queues. - - Also persists last-seen per-room stream_ordering to 'destination_rooms'. - - Args: - events_and_dests: A list of tuples, which are (event: EventBase, destinations: Collection[str]). - Every event is paired with its intended destinations (in federation). - """ - # Tuples of room_id + destination to their max-seen stream_ordering - room_with_dest_stream_ordering = {} # type: Dict[Tuple[str, str], int] - - # List of events to send to each destination - events_by_dest = {} # type: Dict[str, List[EventBase]] + async def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None: + # We loop through all destinations to see whether we already have + # a transaction in progress. If we do, stick it in the pending_pdus + # table and we'll get back to it later. - # For each event-destinations pair... - for event, destinations in events_and_dests: + destinations = set(destinations) + destinations.discard(self.server_name) + logger.debug("Sending to: %s", str(destinations)) - # (we got this from the database, it's filled) - assert event.internal_metadata.stream_ordering - - sent_pdus_destination_dist_total.inc(len(destinations)) - sent_pdus_destination_dist_count.inc() + if not destinations: + return - # ...iterate over those destinations.. - for destination in destinations: - # ...update their stream-ordering... - room_with_dest_stream_ordering[(event.room_id, destination)] = max( - event.internal_metadata.stream_ordering, - room_with_dest_stream_ordering.get((event.room_id, destination), 0), - ) + sent_pdus_destination_dist_total.inc(len(destinations)) + sent_pdus_destination_dist_count.inc() - # ...and add the event to each destination queue. - events_by_dest.setdefault(destination, []).append(event) + assert pdu.internal_metadata.stream_ordering - # Bulk-store destination_rooms stream_ids - await self.store.bulk_store_destination_rooms_entries( - room_with_dest_stream_ordering + # track the fact that we have a PDU for these destinations, + # to allow us to perform catch-up later on if the remote is unreachable + # for a while. + await self.store.store_destination_rooms_entries( + destinations, + pdu.room_id, + pdu.internal_metadata.stream_ordering, ) - for destination, pdus in events_by_dest.items(): - logger.debug("Sending %d pdus to %s", len(pdus), destination) - - self._get_per_destination_queue(destination).send_pdus(pdus) + for destination in destinations: + self._get_per_destination_queue(destination).send_pdu(pdu) async def send_read_receipt(self, receipt: ReadReceipt) -> None: """Send a RR to any other servers in the room diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 3bb66bce32..3b053ebcfb 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py
@@ -154,22 +154,19 @@ class PerDestinationQueue: + len(self._pending_edus_keyed) ) - def send_pdus(self, pdus: Iterable[EventBase]) -> None: - """Add PDUs to the queue, and start the transmission loop if necessary + def send_pdu(self, pdu: EventBase) -> None: + """Add a PDU to the queue, and start the transmission loop if necessary Args: - pdus: pdus to send + pdu: pdu to send """ if not self._catching_up or self._last_successful_stream_ordering is None: # only enqueue the PDU if we are not catching up (False) or do not # yet know if we have anything to catch up (None) - self._pending_pdus.extend(pdus) + self._pending_pdus.append(pdu) else: - self._catchup_last_skipped = max( - pdu.internal_metadata.stream_ordering - for pdu in pdus - if pdu.internal_metadata.stream_ordering is not None - ) + assert pdu.internal_metadata.stream_ordering + self._catchup_last_skipped = pdu.internal_metadata.stream_ordering self.attempt_new_transaction()