From 00cb3eb24b277bb37bd1b7d8449c08a37cb4b014 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Jan 2016 16:37:41 +0000 Subject: Cache tags and account data --- synapse/util/caches/stream_change_cache.py | 95 ++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 synapse/util/caches/stream_change_cache.py (limited to 'synapse/util/caches/stream_change_cache.py') diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py new file mode 100644 index 0000000000..33b37f7f29 --- /dev/null +++ b/synapse/util/caches/stream_change_cache.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.util.caches import cache_counter, caches_by_name + + +from blist import sorteddict +import logging + + +logger = logging.getLogger(__name__) + + +class StreamChangeCache(object): + """Keeps track of the stream positions of the latest change in a set of entities. + + Typically the entity will be a room or user id. + + Given a list of entities and a stream position, it will give a subset of + entities that may have changed since that position. If position key is too + old then the cache will simply return all given entities. + """ + def __init__(self, name, current_stream_pos, max_size=10000): + self._max_size = max_size + self._entity_to_key = {} + self._cache = sorteddict() + self._earliest_known_stream_pos = current_stream_pos + self.name = name + caches_by_name[self.name] = self._cache + + def get_entity_has_changed(self, entity, stream_pos): + assert type(stream_pos) is int + + if stream_pos <= self._earliest_known_stream_pos: + return True + + latest_entity_change_pos = self._entity_to_key.get(entity, None) + if latest_entity_change_pos is None: + return True + + if stream_pos < latest_entity_change_pos: + return True + + return False + + def get_entities_changed(self, entities, stream_pos): + """Returns subset of entities that have had new things since the + given position. If the position is too old it will just return the given list. + """ + assert type(stream_pos) is int + + if stream_pos > self._earliest_known_stream_pos: + keys = self._cache.keys() + i = keys.bisect_right(stream_pos) + + result = set( + self._cache[k] for k in keys[i:] + ).intersection(entities) + + cache_counter.inc_hits(self.name) + else: + result = entities + cache_counter.inc_misses(self.name) + + return result + + def entity_has_changed(self, entitiy, stream_pos): + """Informs the cache that the entitiy has been changed at the given + position. + """ + assert type(stream_pos) is int + + if stream_pos > self._earliest_known_stream_pos: + old_pos = self._entity_to_key.get(entitiy, None) + if old_pos: + stream_pos = max(stream_pos, old_pos) + self._cache.pop(old_pos, None) + self._cache[stream_pos] = entitiy + + while len(self._cache) > self._max_size: + k, r = self._cache.popitem() + self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos) + self._entity_to_key.pop(r, None) -- cgit 1.5.1 From 45cf827c8fe7163a51f1d0d7c9e2531da9b58c8d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Jan 2016 16:39:18 +0000 Subject: Change name and doc has_entity_changed --- synapse/storage/account_data.py | 2 +- synapse/storage/stream.py | 2 +- synapse/storage/tags.py | 2 +- synapse/util/caches/stream_change_cache.py | 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) (limited to 'synapse/util/caches/stream_change_cache.py') diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 95294c3f6c..62e49e1c0e 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -128,7 +128,7 @@ class AccountDataStore(SQLBaseStore): return (global_account_data, account_data_by_room) - changed = self._account_data_stream_cache.get_entity_has_changed( + changed = self._account_data_stream_cache.has_entity_changed( user_id, int(stream_id) ) if not changed: diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index c7d7893328..6e81d46c60 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -200,7 +200,7 @@ class StreamStore(SQLBaseStore): defer.returnValue(([], from_key)) if from_id: - has_changed = yield self._events_stream_cache.get_entity_has_changed( + has_changed = yield self._events_stream_cache.has_entity_changed( room_id, from_id ) diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 50af899192..75ce04092d 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -88,7 +88,7 @@ class TagsStore(SQLBaseStore): room_ids = [row[0] for row in txn.fetchall()] return room_ids - changed = self._tags_stream_cache.get_entity_has_changed(user_id, int(stream_id)) + changed = self._tags_stream_cache.has_entity_changed(user_id, int(stream_id)) if not changed: defer.returnValue({}) diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 33b37f7f29..3ca0e57780 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -40,7 +40,9 @@ class StreamChangeCache(object): self.name = name caches_by_name[self.name] = self._cache - def get_entity_has_changed(self, entity, stream_pos): + def has_entity_changed(self, entity, stream_pos): + """Returns True if the entity may have been updated since stream_pos + """ assert type(stream_pos) is int if stream_pos <= self._earliest_known_stream_pos: -- cgit 1.5.1 From 0663c5bd52d52e095258e312fb62c2a9cb3f200a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Jan 2016 17:27:28 +0000 Subject: Include cache hits with has_entity_changed --- synapse/util/caches/stream_change_cache.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse/util/caches/stream_change_cache.py') diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 3ca0e57780..b48f5cb273 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -46,15 +46,19 @@ class StreamChangeCache(object): assert type(stream_pos) is int if stream_pos <= self._earliest_known_stream_pos: + cache_counter.inc_misses(self.name) return True latest_entity_change_pos = self._entity_to_key.get(entity, None) if latest_entity_change_pos is None: + cache_counter.inc_misses(self.name) return True if stream_pos < latest_entity_change_pos: + cache_counter.inc_misses(self.name) return True + cache_counter.inc_hits(self.name) return False def get_entities_changed(self, entities, stream_pos): -- cgit 1.5.1 From 82cf3a8043a6ffe1f57d78bf88eba06ad0c53fbe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Jan 2016 17:44:04 +0000 Subject: Fix inequalities --- synapse/util/caches/stream_change_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/util/caches/stream_change_cache.py') diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index b48f5cb273..483e0cdf96 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -45,7 +45,7 @@ class StreamChangeCache(object): """ assert type(stream_pos) is int - if stream_pos <= self._earliest_known_stream_pos: + if stream_pos < self._earliest_known_stream_pos: cache_counter.inc_misses(self.name) return True @@ -67,7 +67,7 @@ class StreamChangeCache(object): """ assert type(stream_pos) is int - if stream_pos > self._earliest_known_stream_pos: + if stream_pos >= self._earliest_known_stream_pos: keys = self._cache.keys() i = keys.bisect_right(stream_pos) -- cgit 1.5.1 From 40431251cba7ef1b623559db972600ece40818a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Jan 2016 18:05:43 +0000 Subject: Correctly update _entity_to_key --- synapse/util/caches/stream_change_cache.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'synapse/util/caches/stream_change_cache.py') diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 483e0cdf96..22a9f8f467 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -82,18 +82,19 @@ class StreamChangeCache(object): return result - def entity_has_changed(self, entitiy, stream_pos): - """Informs the cache that the entitiy has been changed at the given + def entity_has_changed(self, entity, stream_pos): + """Informs the cache that the entity has been changed at the given position. """ assert type(stream_pos) is int if stream_pos > self._earliest_known_stream_pos: - old_pos = self._entity_to_key.get(entitiy, None) + old_pos = self._entity_to_key.get(entity, None) if old_pos: stream_pos = max(stream_pos, old_pos) self._cache.pop(old_pos, None) - self._cache[stream_pos] = entitiy + self._cache[stream_pos] = entity + self._entity_to_key[entity] = stream_pos while len(self._cache) > self._max_size: k, r = self._cache.popitem() -- cgit 1.5.1 From 3f5dd18bd44ae426d3b1ff062dd64acbad72f8ae Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Jan 2016 18:11:41 +0000 Subject: If the same as the earliest key, assume nothing has changed. --- synapse/util/caches/stream_change_cache.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse/util/caches/stream_change_cache.py') diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 22a9f8f467..c673b1bdfc 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -49,6 +49,11 @@ class StreamChangeCache(object): cache_counter.inc_misses(self.name) return True + if stream_pos == self._earliest_known_stream_pos: + # If the same as the earliest key, assume nothing has changed. + cache_counter.inc_hits(self.name) + return False + latest_entity_change_pos = self._entity_to_key.get(entity, None) if latest_entity_change_pos is None: cache_counter.inc_misses(self.name) -- cgit 1.5.1