summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2019-02-18 17:53:31 +0000
committerErik Johnston <erik@matrix.org>2019-02-18 17:53:31 +0000
commita9b5ea6fc1e26ff791118b67af01fdad8e9c68c8 (patch)
tree3fc801c2c0ba1106363da1344dffc52728335f2d
parentMerge pull request #4632 from matrix-org/erikj/basic_sentry (diff)
downloadsynapse-a9b5ea6fc1e26ff791118b67af01fdad8e9c68c8.tar.xz
Batch cache invalidation over replication
Currently whenever the current state changes in a room invalidate a lot
of caches, which cause *a lot* of traffic over replication. Instead,
lets batch up all those invalidations and send a single poke down
the replication streams.

Hopefully this will reduce load on the master process by substantially
reducing traffic.
-rw-r--r--synapse/replication/slave/storage/_base.py19
-rw-r--r--synapse/storage/_base.py57
-rw-r--r--synapse/storage/events.py25
3 files changed, 69 insertions, 32 deletions
diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py
index 2d81d49e9a..1353a32d00 100644
--- a/synapse/replication/slave/storage/_base.py
+++ b/synapse/replication/slave/storage/_base.py
@@ -17,7 +17,7 @@ import logging
 
 import six
 
-from synapse.storage._base import SQLBaseStore
+from synapse.storage._base import _CURRENT_STATE_CACHE_NAME, SQLBaseStore
 from synapse.storage.engines import PostgresEngine
 
 from ._slaved_id_tracker import SlavedIdTracker
@@ -54,12 +54,17 @@ class BaseSlavedStore(SQLBaseStore):
         if stream_name == "caches":
             self._cache_id_gen.advance(token)
             for row in rows:
-                try:
-                    getattr(self, row.cache_func).invalidate(tuple(row.keys))
-                except AttributeError:
-                    # We probably haven't pulled in the cache in this worker,
-                    # which is fine.
-                    pass
+                if row.cache_func == _CURRENT_STATE_CACHE_NAME:
+                    room_id = row.keys[0]
+                    members_changed = set(row.keys[1:])
+                    self._invalidate_state_caches(room_id, members_changed)
+                else:
+                    try:
+                        getattr(self, row.cache_func).invalidate(tuple(row.keys))
+                    except AttributeError:
+                        # We probably haven't pulled in the cache in this worker,
+                        # which is fine.
+                        pass
 
     def _invalidate_cache_and_stream(self, txn, cache_func, keys):
         txn.call_after(cache_func.invalidate, keys)
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index e124161845..f7c6d714ab 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -28,6 +28,7 @@ from twisted.internet import defer
 from synapse.api.errors import StoreError
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+from synapse.types import get_domain_from_id
 from synapse.util.caches.descriptors import Cache
 from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
 from synapse.util.stringutils import exception_to_unicode
@@ -64,6 +65,10 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = {
     "event_search": "event_search_event_id_idx",
 }
 
+# This is a special cache name we use to batch multiple invalidations of caches
+# based on the current state when notifying workers over replication.
+_CURRENT_STATE_CACHE_NAME = "cs_cache_fake"
+
 
 class LoggingTransaction(object):
     """An object that almost-transparently proxies for the 'txn' object
@@ -1184,6 +1189,56 @@ class SQLBaseStore(object):
         be invalidated.
         """
         txn.call_after(cache_func.invalidate, keys)
+        self._send_invalidation_to_replication(txn, cache_func.__name__, keys)
+
+    def _invalidate_state_caches_and_stream(self, txn, room_id, members_changed):
+        """Special case invalidation of caches based on current state.
+
+        We special case this so that we can batch the cache invalidations into a
+        single replication poke.
+
+        Args:
+            txn
+            room_id (str): Room were state changed
+            members_changed (set[str]): The user_ids of members that have changed
+        """
+        txn.call_after(self._invalidate_state_caches, room_id, members_changed)
+
+        keys = [room_id]
+        keys.extend(members_changed)
+        self._send_invalidation_to_replication(
+            txn, _CURRENT_STATE_CACHE_NAME, keys,
+        )
+
+    def _invalidate_state_caches(self, room_id, members_changed):
+        """Invalidates caches that are based on the current state, but does
+        not stream invalidations down replication.
+
+        Args:
+            room_id (str): Room were state changed
+            members_changed (set[str]): The user_ids of members that have changed
+        """
+        for member in members_changed:
+            self.get_rooms_for_user_with_stream_ordering.invalidate((member,))
+
+        for host in set(get_domain_from_id(u) for u in members_changed):
+            self.is_host_joined.invalidate((room_id, host))
+            self.was_host_joined.invalidate((room_id, host))
+
+        self.get_users_in_room.invalidate((room_id,))
+        self.get_room_summary.invalidate((room_id,))
+        self.get_current_state_ids.invalidate((room_id,))
+
+    def _send_invalidation_to_replication(self, txn, cache_name, keys):
+        """Notifies replication that given cache has been invalidated.
+
+        Note that this does *not* invalidate the cache locally.
+
+        Args:
+            txn
+            cache_name (str)
+            keys (list[str])
+        """
 
         if isinstance(self.database_engine, PostgresEngine):
             # get_next() returns a context manager which is designed to wrap
@@ -1201,7 +1256,7 @@ class SQLBaseStore(object):
                 table="cache_invalidation_stream",
                 values={
                     "stream_id": stream_id,
-                    "cache_func": cache_func.__name__,
+                    "cache_func": cache_name,
                     "keys": list(keys),
                     "invalidation_ts": self.clock.time_msec(),
                 }
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 81b250480d..06db9e56e6 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -979,30 +979,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore
                 if ev_type == EventTypes.Member
             )
 
-            for member in members_changed:
-                self._invalidate_cache_and_stream(
-                    txn, self.get_rooms_for_user_with_stream_ordering, (member,)
-                )
-
-            for host in set(get_domain_from_id(u) for u in members_changed):
-                self._invalidate_cache_and_stream(
-                    txn, self.is_host_joined, (room_id, host)
-                )
-                self._invalidate_cache_and_stream(
-                    txn, self.was_host_joined, (room_id, host)
-                )
-
-            self._invalidate_cache_and_stream(
-                txn, self.get_users_in_room, (room_id,)
-            )
-
-            self._invalidate_cache_and_stream(
-                txn, self.get_room_summary, (room_id,)
-            )
-
-            self._invalidate_cache_and_stream(
-                txn, self.get_current_state_ids, (room_id,)
-            )
+            self._invalidate_state_caches_and_stream(txn, room_id, members_changed)
 
     def _update_forward_extremities_txn(self, txn, new_forward_extremities,
                                         max_stream_order):