diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 1888480881..2eef7b707d 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -139,14 +139,13 @@ from typing import (
Hashable,
Iterable,
List,
+ Literal,
Optional,
- Set,
Tuple,
)
import attr
from prometheus_client import Counter
-from typing_extensions import Literal
from twisted.internet import defer
@@ -170,7 +169,13 @@ from synapse.metrics.background_process_metrics import (
run_as_background_process,
wrap_as_background_process,
)
-from synapse.types import JsonDict, ReadReceipt, RoomStreamToken, StrCollection
+from synapse.types import (
+ JsonDict,
+ ReadReceipt,
+ RoomStreamToken,
+ StrCollection,
+ get_domain_from_id,
+)
from synapse.util import Clock
from synapse.util.metrics import Measure
from synapse.util.retryutils import filter_destinations_by_retry_limiter
@@ -297,12 +302,10 @@ class _DestinationWakeupQueue:
# being woken up.
_MAX_TIME_IN_QUEUE = 30.0
- # The maximum duration in seconds between waking up consecutive destination
- # queues.
- _MAX_DELAY = 0.1
-
sender: "FederationSender" = attr.ib()
clock: Clock = attr.ib()
+ max_delay_s: int = attr.ib()
+
queue: "OrderedDict[str, Literal[None]]" = attr.ib(factory=OrderedDict)
processing: bool = attr.ib(default=False)
@@ -332,13 +335,15 @@ class _DestinationWakeupQueue:
# We also add an upper bound to the delay, to gracefully handle the
# case where the queue only has a few entries in it.
current_sleep_seconds = min(
- self._MAX_DELAY, self._MAX_TIME_IN_QUEUE / len(self.queue)
+ self.max_delay_s, self._MAX_TIME_IN_QUEUE / len(self.queue)
)
while self.queue:
destination, _ = self.queue.popitem(last=False)
queue = self.sender._get_per_destination_queue(destination)
+ if queue is None:
+ continue
if not queue._new_data_to_send:
# The per destination queue has already been woken up.
@@ -416,19 +421,14 @@ class FederationSender(AbstractFederationSender):
self._is_processing = False
self._last_poked_id = -1
- # map from room_id to a set of PerDestinationQueues which we believe are
- # awaiting a call to flush_read_receipts_for_room. The presence of an entry
- # here for a given room means that we are rate-limiting RR flushes to that room,
- # and that there is a pending call to _flush_rrs_for_room in the system.
- self._queues_awaiting_rr_flush_by_room: Dict[str, Set[PerDestinationQueue]] = {}
+ self._external_cache = hs.get_external_cache()
- self._rr_txn_interval_per_room_ms = (
- 1000.0
- / hs.config.ratelimiting.federation_rr_transactions_per_room_per_second
+ rr_txn_interval_per_room_s = (
+ 1.0 / hs.config.ratelimiting.federation_rr_transactions_per_room_per_second
+ )
+ self._destination_wakeup_queue = _DestinationWakeupQueue(
+ self, self.clock, max_delay_s=rr_txn_interval_per_room_s
)
-
- self._external_cache = hs.get_external_cache()
- self._destination_wakeup_queue = _DestinationWakeupQueue(self, self.clock)
# Regularly wake up destinations that have outstanding PDUs to be caught up
self.clock.looping_call_now(
@@ -438,12 +438,23 @@ class FederationSender(AbstractFederationSender):
self._wake_destinations_needing_catchup,
)
- def _get_per_destination_queue(self, destination: str) -> PerDestinationQueue:
+ def _get_per_destination_queue(
+ self, destination: str
+ ) -> Optional[PerDestinationQueue]:
"""Get or create a PerDestinationQueue for the given destination
Args:
destination: server_name of remote server
+
+ Returns:
+ None if the destination is not allowed by the federation whitelist.
+ Otherwise a PerDestinationQueue for this destination.
"""
+ if not self.hs.config.federation.is_domain_allowed_according_to_federation_whitelist(
+ destination
+ ):
+ return None
+
queue = self._per_destination_queues.get(destination)
if not queue:
queue = PerDestinationQueue(self.hs, self._transaction_manager, destination)
@@ -720,6 +731,16 @@ class FederationSender(AbstractFederationSender):
# track the fact that we have a PDU for these destinations,
# to allow us to perform catch-up later on if the remote is unreachable
# for a while.
+ # Filter out any destinations not present in the federation_domain_whitelist, if
+ # the whitelist exists. These destinations should not be sent to so let's not
+ # waste time or space keeping track of events destined for them.
+ destinations = [
+ d
+ for d in destinations
+ if self.hs.config.federation.is_domain_allowed_according_to_federation_whitelist(
+ d
+ )
+ ]
await self.store.store_destination_rooms_entries(
destinations,
pdu.room_id,
@@ -734,7 +755,12 @@ class FederationSender(AbstractFederationSender):
)
for destination in destinations:
- self._get_per_destination_queue(destination).send_pdu(pdu)
+ queue = self._get_per_destination_queue(destination)
+ # We expect `queue` to not be None as we already filtered out
+ # non-whitelisted destinations above.
+ assert queue is not None
+
+ queue.send_pdu(pdu)
async def send_read_receipt(self, receipt: ReadReceipt) -> None:
"""Send a RR to any other servers in the room
@@ -745,37 +771,48 @@ class FederationSender(AbstractFederationSender):
# Some background on the rate-limiting going on here.
#
- # It turns out that if we attempt to send out RRs as soon as we get them from
- # a client, then we end up trying to do several hundred Hz of federation
- # transactions. (The number of transactions scales as O(N^2) on the size of a
- # room, since in a large room we have both more RRs coming in, and more servers
- # to send them to.)
+ # It turns out that if we attempt to send out RRs as soon as we get them
+ # from a client, then we end up trying to do several hundred Hz of
+ # federation transactions. (The number of transactions scales as O(N^2)
+ # on the size of a room, since in a large room we have both more RRs
+ # coming in, and more servers to send them to.)
#
- # This leads to a lot of CPU load, and we end up getting behind. The solution
- # currently adopted is as follows:
+ # This leads to a lot of CPU load, and we end up getting behind. The
+ # solution currently adopted is to differentiate between receipts and
+ # destinations we should immediately send to, and those we can trickle
+ # the receipts to.
#
- # The first receipt in a given room is sent out immediately, at time T0. Any
- # further receipts are, in theory, batched up for N seconds, where N is calculated
- # based on the number of servers in the room to achieve a transaction frequency
- # of around 50Hz. So, for example, if there were 100 servers in the room, then
- # N would be 100 / 50Hz = 2 seconds.
+ # The current logic is to send receipts out immediately if:
+ # - the room is "small", i.e. there's only N servers to send receipts
+ # to, and so sending out the receipts immediately doesn't cause too
+ # much load; or
+ # - the receipt is for an event that happened recently, as users
+ # notice if receipts are delayed when they know other users are
+ # currently reading the room; or
+ # - the receipt is being sent to the server that sent the event, so
+ # that users see receipts for their own receipts quickly.
#
- # Then, after T+N, we flush out any receipts that have accumulated, and restart
- # the timer to flush out more receipts at T+2N, etc. If no receipts accumulate,
- # we stop the cycle and go back to the start.
+ # For destinations that we should delay sending the receipt to, we queue
+ # the receipts up to be sent in the next transaction, but don't trigger
+ # a new transaction to be sent. We then add the destination to the
+ # `DestinationWakeupQueue`, which will slowly iterate over each
+ # destination and trigger a new transaction to be sent.
#
- # However, in practice, it is often possible to flush out receipts earlier: in
- # particular, if we are sending a transaction to a given server anyway (for
- # example, because we have a PDU or a RR in another room to send), then we may
- # as well send out all of the pending RRs for that server. So it may be that
- # by the time we get to T+N, we don't actually have any RRs left to send out.
- # Nevertheless we continue to buffer up RRs for the room in question until we
- # reach the point that no RRs arrive between timer ticks.
+ # However, in practice, it is often possible to send out delayed
+ # receipts earlier: in particular, if we are sending a transaction to a
+ # given server anyway (for example, because we have a PDU or a RR in
+ # another room to send), then we may as well send out all of the pending
+ # RRs for that server. So it may be that by the time we get to waking up
+ # the destination, we don't actually have any RRs left to send out.
#
- # For even more background, see https://github.com/matrix-org/synapse/issues/4730.
+ # For even more background, see
+ # https://github.com/matrix-org/synapse/issues/4730.
room_id = receipt.room_id
+ # Local read receipts always have 1 event ID.
+ event_id = receipt.event_ids[0]
+
# Work out which remote servers should be poked and poke them.
domains_set = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
room_id
@@ -797,49 +834,55 @@ class FederationSender(AbstractFederationSender):
if not domains:
return
- queues_pending_flush = self._queues_awaiting_rr_flush_by_room.get(room_id)
+ # We now split which domains we want to wake up immediately vs which we
+ # want to delay waking up.
+ immediate_domains: StrCollection
+ delay_domains: StrCollection
- # if there is no flush yet scheduled, we will send out these receipts with
- # immediate flushes, and schedule the next flush for this room.
- if queues_pending_flush is not None:
- logger.debug("Queuing receipt for: %r", domains)
+ if len(domains) < 10:
+ # For "small" rooms send to all domains immediately
+ immediate_domains = domains
+ delay_domains = ()
else:
- logger.debug("Sending receipt to: %r", domains)
- self._schedule_rr_flush_for_room(room_id, len(domains))
+ metadata = await self.store.get_metadata_for_event(
+ receipt.room_id, event_id
+ )
+ assert metadata is not None
- for domain in domains:
- queue = self._get_per_destination_queue(domain)
- queue.queue_read_receipt(receipt)
+ sender_domain = get_domain_from_id(metadata.sender)
- # if there is already a RR flush pending for this room, then make sure this
- # destination is registered for the flush
- if queues_pending_flush is not None:
- queues_pending_flush.add(queue)
+ if self.clock.time_msec() - metadata.received_ts < 60_000:
+ # We always send receipts for recent messages immediately
+ immediate_domains = domains
+ delay_domains = ()
else:
- queue.flush_read_receipts_for_room(room_id)
-
- def _schedule_rr_flush_for_room(self, room_id: str, n_domains: int) -> None:
- # that is going to cause approximately len(domains) transactions, so now back
- # off for that multiplied by RR_TXN_INTERVAL_PER_ROOM
- backoff_ms = self._rr_txn_interval_per_room_ms * n_domains
-
- logger.debug("Scheduling RR flush in %s in %d ms", room_id, backoff_ms)
- self.clock.call_later(backoff_ms, self._flush_rrs_for_room, room_id)
- self._queues_awaiting_rr_flush_by_room[room_id] = set()
-
- def _flush_rrs_for_room(self, room_id: str) -> None:
- queues = self._queues_awaiting_rr_flush_by_room.pop(room_id)
- logger.debug("Flushing RRs in %s to %s", room_id, queues)
-
- if not queues:
- # no more RRs arrived for this room; we are done.
- return
+ # Otherwise, we delay waking up all destinations except for the
+ # sender's domain.
+ immediate_domains = []
+ delay_domains = []
+ for domain in domains:
+ if domain == sender_domain:
+ immediate_domains.append(domain)
+ else:
+ delay_domains.append(domain)
+
+ for domain in immediate_domains:
+ # Add to destination queue and wake the destination up
+ queue = self._get_per_destination_queue(domain)
+ if queue is None:
+ continue
+ queue.queue_read_receipt(receipt)
+ queue.attempt_new_transaction()
- # schedule the next flush
- self._schedule_rr_flush_for_room(room_id, len(queues))
+ for domain in delay_domains:
+ # Add to destination queue...
+ queue = self._get_per_destination_queue(domain)
+ if queue is None:
+ continue
+ queue.queue_read_receipt(receipt)
- for queue in queues:
- queue.flush_read_receipts_for_room(room_id)
+ # ... and schedule the destination to be woken up.
+ self._destination_wakeup_queue.add_to_queue(domain)
async def send_presence_to_destinations(
self, states: Iterable[UserPresenceState], destinations: Iterable[str]
@@ -871,9 +914,10 @@ class FederationSender(AbstractFederationSender):
if self.is_mine_server_name(destination):
continue
- self._get_per_destination_queue(destination).send_presence(
- states, start_loop=False
- )
+ queue = self._get_per_destination_queue(destination)
+ if queue is None:
+ continue
+ queue.send_presence(states, start_loop=False)
self._destination_wakeup_queue.add_to_queue(destination)
@@ -923,6 +967,8 @@ class FederationSender(AbstractFederationSender):
return
queue = self._get_per_destination_queue(edu.destination)
+ if queue is None:
+ return
if key:
queue.send_keyed_edu(edu, key)
else:
@@ -947,9 +993,15 @@ class FederationSender(AbstractFederationSender):
for destination in destinations:
if immediate:
- self._get_per_destination_queue(destination).attempt_new_transaction()
+ queue = self._get_per_destination_queue(destination)
+ if queue is None:
+ continue
+ queue.attempt_new_transaction()
else:
- self._get_per_destination_queue(destination).mark_new_data()
+ queue = self._get_per_destination_queue(destination)
+ if queue is None:
+ continue
+ queue.mark_new_data()
self._destination_wakeup_queue.add_to_queue(destination)
def wake_destination(self, destination: str) -> None:
@@ -968,7 +1020,9 @@ class FederationSender(AbstractFederationSender):
):
return
- self._get_per_destination_queue(destination).attempt_new_transaction()
+ queue = self._get_per_destination_queue(destination)
+ if queue is not None:
+ queue.attempt_new_transaction()
@staticmethod
def get_current_token() -> int:
@@ -1013,6 +1067,9 @@ class FederationSender(AbstractFederationSender):
d
for d in destinations_to_wake
if self._federation_shard_config.should_handle(self._instance_name, d)
+ and self.hs.config.federation.is_domain_allowed_according_to_federation_whitelist(
+ d
+ )
]
for destination in destinations_to_wake:
|