diff options
author | Richard van der Hoff <richard@matrix.org> | 2018-07-12 11:37:44 +0100 |
---|---|---|
committer | Richard van der Hoff <richard@matrix.org> | 2018-07-12 11:37:44 +0100 |
commit | fa5c2bc082185580e7d65a8759a8b5213933a137 (patch) | |
tree | d17427df60d28be7fabed6f6ecf0de5dc2a05c5f /synapse/util/caches/stream_change_cache.py | |
parent | Merge pull request #3505 from matrix-org/erikj/receipts_cahce (diff) | |
download | synapse-fa5c2bc082185580e7d65a8759a8b5213933a137.tar.xz |
Reduce set building in get_entities_changed
This line shows up as about 5% of cpu time on a synchrotron: not_known_entities = set(entities) - set(self._entity_to_key) Presumably the problem here is that _entity_to_key can be largeish, and building a set for its keys every time this function is called is slow. Here we rewrite the logic to avoid building so many sets.
Diffstat (limited to 'synapse/util/caches/stream_change_cache.py')
-rw-r--r-- | synapse/util/caches/stream_change_cache.py | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 8637867c6d..a1f8ff8f10 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -74,14 +74,18 @@ class StreamChangeCache(object): assert type(stream_pos) is int if stream_pos >= self._earliest_known_stream_pos: - not_known_entities = set(entities) - set(self._entity_to_key) - - result = ( - {self._cache[k] for k in self._cache.islice( - start=self._cache.bisect_right(stream_pos))} - .intersection(entities) - .union(not_known_entities) - ) + changed_entities = { + self._cache[k] for k in self._cache.islice( + start=self._cache.bisect_right(stream_pos), + ) + } + + # we need to include entities which we don't know about, as well as + # those which are known to have changed since the stream pos. + result = { + e for e in entities + if e in changed_entities or e not in self._entity_to_key + } self.metrics.inc_hits() else: |