Bulk-invalidate e2e cached queries after claiming keys (#16613)

Co-authored-by: Patrick Cloke <patrickc@matrix.org>
author: David Robertson <davidr@element.io> 2023-11-09 15:57:09 +0000
committer: GitHub <noreply@github.com> 2023-11-09 15:57:09 +0000
commit: 91587d4cf903c926a29ed70cecd63549271dc3b9 (patch)
tree: 829d57d7066ec94f15d657f5bf6a117af478a451 /synapse/storage/databases
parent: Bump pyicu from 2.11 to 2.12 (#16603) (diff)
download: synapse-91587d4cf903c926a29ed70cecd63549271dc3b9.tar.xz
2 files changed, 83 insertions, 18 deletions
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 4d0470ffd9..d7232f566b 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -483,6 +483,30 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         txn.call_after(cache_func.invalidate, keys)
         self._send_invalidation_to_replication(txn, cache_func.__name__, keys)
 
+    def _invalidate_cache_and_stream_bulk(
+        self,
+        txn: LoggingTransaction,
+        cache_func: CachedFunction,
+        key_tuples: Collection[Tuple[Any, ...]],
+    ) -> None:
+        """A bulk version of _invalidate_cache_and_stream.
+
+        Locally invalidate every key-tuple in `key_tuples`, then emit invalidations
+        for each key-tuple over replication.
+
+        This implementation is more efficient than a loop which repeatedly calls the
+        non-bulk version.
+        """
+        if not key_tuples:
+            return
+
+        for keys in key_tuples:
+            txn.call_after(cache_func.invalidate, keys)
+
+        self._send_invalidation_to_replication_bulk(
+            txn, cache_func.__name__, key_tuples
+        )
+
     def _invalidate_all_cache_and_stream(
         self, txn: LoggingTransaction, cache_func: CachedFunction
     ) -> None:
@@ -564,10 +588,6 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         if isinstance(self.database_engine, PostgresEngine):
             assert self._cache_id_gen is not None
 
-            # get_next() returns a context manager which is designed to wrap
-            # the transaction. However, we want to only get an ID when we want
-            # to use it, here, so we need to call __enter__ manually, and have
-            # __exit__ called after the transaction finishes.
             stream_id = self._cache_id_gen.get_next_txn(txn)
             txn.call_after(self.hs.get_notifier().on_new_replication_data)
 
@@ -586,6 +606,53 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
                 },
             )
 
+    def _send_invalidation_to_replication_bulk(
+        self,
+        txn: LoggingTransaction,
+        cache_name: str,
+        key_tuples: Collection[Tuple[Any, ...]],
+    ) -> None:
+        """Announce the invalidation of multiple (but not all) cache entries.
+
+        This is more efficient than repeated calls to the non-bulk version. It should
+        NOT be used to invalidating the entire cache: use
+        `_send_invalidation_to_replication` with keys=None.
+
+        Note that this does *not* invalidate the cache locally.
+
+        Args:
+            txn
+            cache_name
+            key_tuples: Key-tuples to invalidate. Assumed to be non-empty.
+        """
+        if isinstance(self.database_engine, PostgresEngine):
+            assert self._cache_id_gen is not None
+
+            stream_ids = self._cache_id_gen.get_next_mult_txn(txn, len(key_tuples))
+            ts = self._clock.time_msec()
+            txn.call_after(self.hs.get_notifier().on_new_replication_data)
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="cache_invalidation_stream_by_instance",
+                keys=(
+                    "stream_id",
+                    "instance_name",
+                    "cache_func",
+                    "keys",
+                    "invalidation_ts",
+                ),
+                values=[
+                    # We convert key_tuples to a list here because psycopg2 serialises
+                    # lists as pq arrrays, but serialises tuples as "composite types".
+                    # (We need an array because the `keys` column has type `[]text`.)
+                    # See:
+                    #     https://www.psycopg.org/docs/usage.html#adapt-list
+                    #     https://www.psycopg.org/docs/usage.html#adapt-tuple
+                    (stream_id, self._instance_name, cache_name, list(key_tuple), ts)
+                    for stream_id, key_tuple in zip(stream_ids, key_tuples)
+                ],
+            )
+
     def get_cache_stream_token_for_writer(self, instance_name: str) -> int:
         if self._cache_id_gen:
             return self._cache_id_gen.get_current_token_for_writer(instance_name)
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 4f96ac25c7..3005e2a2c5 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -1237,13 +1237,11 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         for user_id, device_id, algorithm, key_id, key_json in claimed_keys:
             device_results = results.setdefault(user_id, {}).setdefault(device_id, {})
             device_results[f"{algorithm}:{key_id}"] = json_decoder.decode(key_json)
-
-            if (user_id, device_id) in seen_user_device:
-                continue
             seen_user_device.add((user_id, device_id))
-            self._invalidate_cache_and_stream(
-                txn, self.get_e2e_unused_fallback_key_types, (user_id, device_id)
-            )
+
+        self._invalidate_cache_and_stream_bulk(
+            txn, self.get_e2e_unused_fallback_key_types, seen_user_device
+        )
 
         return results
 
@@ -1376,14 +1374,14 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             List[Tuple[str, str, str, str, str]], txn.execute_values(sql, query_list)
         )
 
-        seen_user_device: Set[Tuple[str, str]] = set()
-        for user_id, device_id, _, _, _ in otk_rows:
-            if (user_id, device_id) in seen_user_device:
-                continue
-            seen_user_device.add((user_id, device_id))
-            self._invalidate_cache_and_stream(
-                txn, self.count_e2e_one_time_keys, (user_id, device_id)
-            )
+        seen_user_device = {
+            (user_id, device_id) for user_id, device_id, _, _, _ in otk_rows
+        }
+        self._invalidate_cache_and_stream_bulk(
+            txn,
+            self.count_e2e_one_time_keys,
+            seen_user_device,
+        )
 
         return otk_rows
author	David Robertson <davidr@element.io>	2023-11-09 15:57:09 +0000
committer	GitHub <noreply@github.com>	2023-11-09 15:57:09 +0000
commit	91587d4cf903c926a29ed70cecd63549271dc3b9 (patch)
tree	829d57d7066ec94f15d657f5bf6a117af478a451 /synapse/storage/databases
parent	Bump pyicu from 2.11 to 2.12 (#16603) (diff)
download	synapse-91587d4cf903c926a29ed70cecd63549271dc3b9.tar.xz