summary refs log tree commit diff
path: root/synapse/util/caches
diff options
context:
space:
mode:
authorRichard van der Hoff <richard@matrix.org>2018-07-12 11:37:44 +0100
committerRichard van der Hoff <richard@matrix.org>2018-07-12 11:37:44 +0100
commitfa5c2bc082185580e7d65a8759a8b5213933a137 (patch)
treed17427df60d28be7fabed6f6ecf0de5dc2a05c5f /synapse/util/caches
parentMerge pull request #3505 from matrix-org/erikj/receipts_cahce (diff)
downloadsynapse-fa5c2bc082185580e7d65a8759a8b5213933a137.tar.xz
Reduce set building in get_entities_changed
This line shows up as about 5% of cpu time on a synchrotron:

    not_known_entities = set(entities) - set(self._entity_to_key)

Presumably the problem here is that _entity_to_key can be largeish, and
building a set for its keys every time this function is called is slow.

Here we rewrite the logic to avoid building so many sets.
Diffstat (limited to 'synapse/util/caches')
-rw-r--r--synapse/util/caches/stream_change_cache.py20
1 files changed, 12 insertions, 8 deletions
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py

index 8637867c6d..a1f8ff8f10 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py
@@ -74,14 +74,18 @@ class StreamChangeCache(object): assert type(stream_pos) is int if stream_pos >= self._earliest_known_stream_pos: - not_known_entities = set(entities) - set(self._entity_to_key) - - result = ( - {self._cache[k] for k in self._cache.islice( - start=self._cache.bisect_right(stream_pos))} - .intersection(entities) - .union(not_known_entities) - ) + changed_entities = { + self._cache[k] for k in self._cache.islice( + start=self._cache.bisect_right(stream_pos), + ) + } + + # we need to include entities which we don't know about, as well as + # those which are known to have changed since the stream pos. + result = { + e for e in entities + if e in changed_entities or e not in self._entity_to_key + } self.metrics.inc_hits() else: