summary refs log tree commit diff
path: root/synapse/util/caches
diff options
context:
space:
mode:
authorRichard van der Hoff <richard@matrix.org>2018-07-12 11:37:44 +0100
committerRichard van der Hoff <richard@matrix.org>2018-07-12 11:37:44 +0100
commitfa5c2bc082185580e7d65a8759a8b5213933a137 (patch)
treed17427df60d28be7fabed6f6ecf0de5dc2a05c5f /synapse/util/caches
parentMerge pull request #3505 from matrix-org/erikj/receipts_cahce (diff)
downloadsynapse-fa5c2bc082185580e7d65a8759a8b5213933a137.tar.xz
Reduce set building in get_entities_changed
This line shows up as about 5% of cpu time on a synchrotron:

    not_known_entities = set(entities) - set(self._entity_to_key)

Presumably the problem here is that _entity_to_key can be largeish, and
building a set for its keys every time this function is called is slow.

Here we rewrite the logic to avoid building so many sets.
Diffstat (limited to 'synapse/util/caches')
-rw-r--r--synapse/util/caches/stream_change_cache.py20
1 files changed, 12 insertions, 8 deletions
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index 8637867c6d..a1f8ff8f10 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -74,14 +74,18 @@ class StreamChangeCache(object):
         assert type(stream_pos) is int
 
         if stream_pos >= self._earliest_known_stream_pos:
-            not_known_entities = set(entities) - set(self._entity_to_key)
-
-            result = (
-                {self._cache[k] for k in self._cache.islice(
-                    start=self._cache.bisect_right(stream_pos))}
-                .intersection(entities)
-                .union(not_known_entities)
-            )
+            changed_entities = {
+                self._cache[k] for k in self._cache.islice(
+                    start=self._cache.bisect_right(stream_pos),
+                )
+            }
+
+            # we need to include entities which we don't know about, as well as
+            # those which are known to have changed since the stream pos.
+            result = {
+                e for e in entities
+                if e in changed_entities or e not in self._entity_to_key
+            }
 
             self.metrics.inc_hits()
         else: