summary refs log tree commit diff
path: root/synapse/federation
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/federation')
-rw-r--r--synapse/federation/sender/__init__.py20
-rw-r--r--synapse/federation/sender/per_destination_queue.py15
-rw-r--r--synapse/federation/sender/transaction_manager.py4
3 files changed, 38 insertions, 1 deletions
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py

index 5276c1734f..88474492e5 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py
@@ -151,10 +151,25 @@ class FederationSender(object): "process_event_queue_for_federation", self._process_event_queue_loop ) - async def _process_event_queue_loop(self) -> None: + async def _process_event_queue_loop(self): + loop_start_time = self.clock.time_msec() try: self._is_processing = True while True: + # if we've been going around this loop for a long time without + # catching up, deprioritise transaction transmission. This should mean + # that events get batched into fewer transactions, which is more + # efficient, and hence give us a chance to catch up + if ( + self.clock.time_msec() - loop_start_time > 60 * 1000 + and not self._transaction_manager.deprioritise_transmission + ): + logger.warning( + "Event queue is getting behind: deprioritising transaction " + "transmission" + ) + self._transaction_manager.deprioritise_transmission = True + last_token = await self.store.get_federation_out_pos("events") next_token, events = await self.store.get_all_new_events_stream( last_token, self._last_poked_id, limit=100 @@ -264,6 +279,9 @@ class FederationSender(object): finally: self._is_processing = False + if self._transaction_manager.deprioritise_transmission: + logger.info("Event queue caught up: re-prioritising transmission") + self._transaction_manager.deprioritise_transmission = False def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None: # We loop through all destinations to see whether we already have diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index f1534d431d..c624a1cfdf 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py
@@ -15,6 +15,7 @@ # limitations under the License. import datetime import logging +import random from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Tuple from prometheus_client import Counter @@ -39,6 +40,8 @@ if TYPE_CHECKING: # This is defined in the Matrix spec and enforced by the receiver. MAX_EDUS_PER_TRANSACTION = 100 +DEPRIORITISE_SLEEP_TIME = 10 + logger = logging.getLogger(__name__) @@ -220,6 +223,18 @@ class PerDestinationQueue(object): pending_pdus = [] while True: + if self._transaction_manager.deprioritise_transmission: + # if the event-processing loop has got behind, sleep to give it + # a chance to catch up. Add some randomness so that the transmitters + # don't all wake up in sync. + sleeptime = random.uniform( + DEPRIORITISE_SLEEP_TIME, DEPRIORITISE_SLEEP_TIME * 2 + ) + logger.info( + "TX [%s]: sleeping for %f seconds", self._destination, sleeptime + ) + await self._clock.sleep(sleeptime) + # We have to keep 2 free slots for presence and rr_edus limit = MAX_EDUS_PER_TRANSACTION - 2 diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py
index 0ebc70d57d..031d80593d 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py
@@ -51,6 +51,10 @@ class TransactionManager(object): # HACK to get unique tx id self._next_txn_id = int(self.clock.time_msec()) + # the federation sender sometimes sets this to delay transaction transmission, + # if the sender gets behind. + self.deprioritise_transmission = False + @measure_func("_send_new_transaction") async def send_new_transaction( self, destination: str, pdus: List[EventBase], edus: List[Edu],