From 61959928bb4b6b0191d301f1b267af7290a61bd2 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 19 Feb 2015 14:58:07 +0000 Subject: Pull out the 'get_rooms_for_user' cache logic into a reĆ¼sable @cached decorator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- synapse/storage/roommember.py | 53 +++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 24 deletions(-) (limited to 'synapse/storage/roommember.py') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 9bf608bc90..569bd55d0f 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -33,6 +33,32 @@ RoomsForUser = namedtuple( ) +# TODO(paul): +# * Move this somewhere higher-level, shared; +# * more generic key management +# * export monitoring stats +# * maximum size; just evict things at random, or consider LRU? +def cached(orig): + cache = {} + + @defer.inlineCallbacks + def wrapped(self, key): + if key in cache: + defer.returnValue(cache[key]) + + ret = yield orig(self, key) + + cache[key] = ret; + defer.returnValue(ret) + + def invalidate(key): + if key in cache: + del cache[key] + + wrapped.invalidate = invalidate + return wrapped + + class RoomMemberStore(SQLBaseStore): def __init__(self, *args, **kw): @@ -103,7 +129,7 @@ class RoomMemberStore(SQLBaseStore): txn.execute(sql, (event.room_id, domain)) - self.invalidate_rooms_for_user(target_user_id) + self.get_rooms_for_user.invalidate(target_user_id) @defer.inlineCallbacks def get_room_member(self, user_id, room_id): @@ -247,33 +273,12 @@ class RoomMemberStore(SQLBaseStore): results = self._parse_events_txn(txn, rows) return results - # TODO(paul): Create a nice @cached decorator to do this - # @cached - # def get_foo(...) - # ... - # invalidate_foo = get_foo.invalidator - - @defer.inlineCallbacks + @cached def get_rooms_for_user(self, user_id): - # TODO(paul): put some performance counters in here so we can easily - # track what impact this cache is having - if user_id in self._user_rooms_cache: - defer.returnValue(self._user_rooms_cache[user_id]) - - rooms = yield self.get_rooms_for_user_where_membership_is( + return self.get_rooms_for_user_where_membership_is( user_id, membership_list=[Membership.JOIN], ) - # TODO(paul): Consider applying a maximum size; just evict things at - # random, or consider LRU? - - self._user_rooms_cache[user_id] = rooms - defer.returnValue(rooms) - - def invalidate_rooms_for_user(self, user_id): - if user_id in self._user_rooms_cache: - del self._user_rooms_cache[user_id] - @defer.inlineCallbacks def user_rooms_intersect(self, user_id_list): """ Checks whether all the users whose IDs are given in a list share a -- cgit 1.4.1 From 077d20034278ea57c57d501de11bfb1f0c7f9603 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 19 Feb 2015 17:29:39 +0000 Subject: Move @cached decorator out into synapse.storage._base; add minimal docs --- synapse/storage/_base.py | 35 +++++++++++++++++++++++++++++++++++ synapse/storage/roommember.py | 28 +--------------------------- 2 files changed, 36 insertions(+), 27 deletions(-) (limited to 'synapse/storage/roommember.py') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index be9934c66f..fd275039be 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -35,6 +35,41 @@ sql_logger = logging.getLogger("synapse.storage.SQL") transaction_logger = logging.getLogger("synapse.storage.txn") +# TODO(paul): +# * Move this somewhere higher-level, shared; +# * more generic key management +# * export monitoring stats +# * maximum size; just evict things at random, or consider LRU? +def cached(orig): + """ A method decorator that applies a memoizing cache around the function. + + The function is presumed to take one additional argument, which is used as + the key for the cache. Cache hits are served directly from the cache; + misses use the function body to generate the value. + + The wrapped function has an additional member, a callable called + "invalidate". This can be used to remove individual entries from the cache. + """ + cache = {} + + @defer.inlineCallbacks + def wrapped(self, key): + if key in cache: + defer.returnValue(cache[key]) + + ret = yield orig(self, key) + + cache[key] = ret; + defer.returnValue(ret) + + def invalidate(key): + if key in cache: + del cache[key] + + wrapped.invalidate = invalidate + return wrapped + + class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object passed to the constructor. Adds logging to the .execute() method.""" diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 569bd55d0f..b8fcc1927e 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -17,7 +17,7 @@ from twisted.internet import defer from collections import namedtuple -from ._base import SQLBaseStore +from ._base import SQLBaseStore, cached from synapse.api.constants import Membership from synapse.types import UserID @@ -33,32 +33,6 @@ RoomsForUser = namedtuple( ) -# TODO(paul): -# * Move this somewhere higher-level, shared; -# * more generic key management -# * export monitoring stats -# * maximum size; just evict things at random, or consider LRU? -def cached(orig): - cache = {} - - @defer.inlineCallbacks - def wrapped(self, key): - if key in cache: - defer.returnValue(cache[key]) - - ret = yield orig(self, key) - - cache[key] = ret; - defer.returnValue(ret) - - def invalidate(key): - if key in cache: - del cache[key] - - wrapped.invalidate = invalidate - return wrapped - - class RoomMemberStore(SQLBaseStore): def __init__(self, *args, **kw): -- cgit 1.4.1 From ebc3db295bfe6d0c43bf45b8fcd7fa6bbc429375 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 19 Feb 2015 18:36:02 +0000 Subject: Take named arguments to @cached() decorator, add a 'max_entries' limit --- synapse/storage/_base.py | 39 +++++++++++-------- synapse/storage/roommember.py | 2 +- tests/storage/test__base.py | 89 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 16 deletions(-) create mode 100644 tests/storage/test__base.py (limited to 'synapse/storage/roommember.py') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index fd275039be..61657d36ed 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -39,8 +39,8 @@ transaction_logger = logging.getLogger("synapse.storage.txn") # * Move this somewhere higher-level, shared; # * more generic key management # * export monitoring stats -# * maximum size; just evict things at random, or consider LRU? -def cached(orig): +# * consider other eviction strategies - LRU? +def cached(max_entries=1000): """ A method decorator that applies a memoizing cache around the function. The function is presumed to take one additional argument, which is used as @@ -50,24 +50,33 @@ def cached(orig): The wrapped function has an additional member, a callable called "invalidate". This can be used to remove individual entries from the cache. """ - cache = {} + def wrap(orig): + cache = {} - @defer.inlineCallbacks - def wrapped(self, key): - if key in cache: - defer.returnValue(cache[key]) + @defer.inlineCallbacks + def wrapped(self, key): + if key in cache: + defer.returnValue(cache[key]) + + ret = yield orig(self, key) + + while len(cache) > max_entries: + # TODO(paul): This feels too biased. However, a random index + # would be a bit inefficient, walking the list of keys just + # to ignore most of them? + del cache[cache.keys()[0]] - ret = yield orig(self, key) + cache[key] = ret; + defer.returnValue(ret) - cache[key] = ret; - defer.returnValue(ret) + def invalidate(key): + if key in cache: + del cache[key] - def invalidate(key): - if key in cache: - del cache[key] + wrapped.invalidate = invalidate + return wrapped - wrapped.invalidate = invalidate - return wrapped + return wrap class LoggingTransaction(object): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index b8fcc1927e..33a832483e 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -247,7 +247,7 @@ class RoomMemberStore(SQLBaseStore): results = self._parse_events_txn(txn, rows) return results - @cached + @cached() def get_rooms_for_user(self, user_id): return self.get_rooms_for_user_where_membership_is( user_id, membership_list=[Membership.JOIN], diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py new file mode 100644 index 0000000000..057f798640 --- /dev/null +++ b/tests/storage/test__base.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from tests import unittest +from twisted.internet import defer + +from synapse.storage._base import cached + + +class CacheDecoratorTestCase(unittest.TestCase): + + @defer.inlineCallbacks + def test_passthrough(self): + @cached() + def func(self, key): + return key + + self.assertEquals((yield func(self, "foo")), "foo") + self.assertEquals((yield func(self, "bar")), "bar") + + @defer.inlineCallbacks + def test_hit(self): + callcount = [0] + + @cached() + def func(self, key): + callcount[0] += 1 + return key + + yield func(self, "foo") + + self.assertEquals(callcount[0], 1) + + self.assertEquals((yield func(self, "foo")), "foo") + self.assertEquals(callcount[0], 1) + + @defer.inlineCallbacks + def test_invalidate(self): + callcount = [0] + + @cached() + def func(self, key): + callcount[0] += 1 + return key + + yield func(self, "foo") + + self.assertEquals(callcount[0], 1) + + func.invalidate("foo") + + yield func(self, "foo") + + self.assertEquals(callcount[0], 2) + + @defer.inlineCallbacks + def test_max_entries(self): + callcount = [0] + + @cached(max_entries=10) + def func(self, key): + callcount[0] += 1 + return key + + for k in range(0,12): + yield func(self, k) + + self.assertEquals(callcount[0], 12) + + # There must have been at least 2 evictions, meaning if we calculate + # all 12 values again, we must get called at least 2 more times + for k in range(0,12): + yield func(self, k) + + self.assertTrue(callcount[0] >= 14, + msg="Expected callcount >= 14, got %d" % (callcount[0])) -- cgit 1.4.1 From 357fba2c24067796ce89f25636a2541bc9a10752 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Mon, 23 Feb 2015 15:57:41 +0000 Subject: RoomMemberStore no longer needs a _user_rooms_cache member --- synapse/storage/roommember.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'synapse/storage/roommember.py') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 33a832483e..58aa376c20 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -35,11 +35,6 @@ RoomsForUser = namedtuple( class RoomMemberStore(SQLBaseStore): - def __init__(self, *args, **kw): - super(RoomMemberStore, self).__init__(*args, **kw) - - self._user_rooms_cache = {} - def _store_room_member_txn(self, txn, event): """Store a room member in the database. """ -- cgit 1.4.1 From 377ae369c1275fabdac46fa00c0b2ba238467435 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 11:20:51 +0000 Subject: Wrap all of get_app_service_rooms in a txn. --- synapse/storage/appservice.py | 38 +++++++++++++++++++++++----------- synapse/storage/directory.py | 21 ------------------- synapse/storage/registration.py | 4 ---- synapse/storage/room.py | 10 --------- synapse/storage/roommember.py | 36 +++++++++++++++++--------------- synapse/storage/stream.py | 46 ++++++++++++++++++++--------------------- 6 files changed, 67 insertions(+), 88 deletions(-) (limited to 'synapse/storage/roommember.py') diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 3a267d0442..97481d113b 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -15,6 +15,7 @@ import logging from twisted.internet import defer +from synapse.api.constants import Membership from synapse.api.errors import StoreError from synapse.appservice import ApplicationService from synapse.storage.roommember import RoomsForUser @@ -197,7 +198,6 @@ class ApplicationServiceStore(SQLBaseStore): # TODO: The from_cache=False impl # TODO: This should be JOINed with the application_services_regex table. - @defer.inlineCallbacks def get_app_service_rooms(self, service): """Get a list of RoomsForUser for this application service. @@ -212,35 +212,49 @@ class ApplicationServiceStore(SQLBaseStore): Returns: A list of RoomsForUser. """ - # FIXME: This is assuming that this store has methods from - # RoomStore, DirectoryStore, RegistrationStore, RoomMemberStore which is - # a bad assumption to make as it makes testing trickier and coupling - # less obvious. + return self.runInteraction( + "get_app_service_rooms", + self._get_app_service_rooms_txn, + service, + ) + def _get_app_service_rooms_txn(self, txn, service): # get all rooms matching the room ID regex. - room_entries = yield self.get_all_rooms() + room_entries = self._simple_select_list_txn( + txn=txn, table="rooms", keyvalues=None, retcols=["room_id"] + ) matching_room_list = set([ r["room_id"] for r in room_entries if service.is_interested_in_room(r["room_id"]) ]) # resolve room IDs for matching room alias regex. - room_alias_mappings = yield self.get_all_associations() + room_alias_mappings = self._simple_select_list_txn( + txn=txn, table="room_aliases", keyvalues=None, + retcols=["room_id", "room_alias"] + ) matching_room_list |= set([ - r.room_id for r in room_alias_mappings if - service.is_interested_in_alias(r.room_alias) + r["room_id"] for r in room_alias_mappings if + service.is_interested_in_alias(r["room_alias"]) ]) # get all rooms for every user for this AS. This is scoped to users on # this HS only. - user_list = yield self.get_all_users() + user_list = self._simple_select_list_txn( + txn=txn, table="users", keyvalues=None, retcols=["name"] + ) user_list = [ u["name"] for u in user_list if service.is_interested_in_user(u["name"]) ] rooms_for_user_matching_user_id = set() # RoomsForUser list for user_id in user_list: - rooms_for_user = yield self.get_rooms_for_user(user_id) + # FIXME: This assumes this store is linked with RoomMemberStore :( + rooms_for_user = self._get_rooms_for_user_where_membership_is_txn( + txn=txn, + user_id=user_id, + membership_list=[Membership.JOIN] + ) rooms_for_user_matching_user_id |= set(rooms_for_user) # make RoomsForUser tuples for room ids and aliases which are not in the @@ -253,7 +267,7 @@ class ApplicationServiceStore(SQLBaseStore): ] rooms_for_user_matching_user_id |= set(missing_rooms_for_user) - defer.returnValue(rooms_for_user_matching_user_id) + return rooms_for_user_matching_user_id @defer.inlineCallbacks def _populate_cache(self): diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index e391239a3c..68b7d59693 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -134,27 +134,6 @@ class DirectoryStore(SQLBaseStore): return room_id - @defer.inlineCallbacks - def get_all_associations(self): - """Retrieve the entire list of room alias -> room ID pairings. - - Returns: - A list of RoomAliasMappings. - """ - results = yield self._execute_and_decode( - "SELECT room_id, room_alias FROM room_aliases" - ) - - # TODO(kegan): It feels wrong to be specifying no servers here, but - # equally this function isn't required to obtain all servers so - # retrieving them "just for the sake of it" also seems wrong, but we - # want to conform to passing Objects around and not dicts.. - defer.returnValue([ - RoomAliasMapping( - room_id=r["room_id"], room_alias=r["room_alias"], servers="" - ) for r in results - ]) - def get_aliases_for_room(self, room_id): return self._simple_select_onecol( "room_aliases", diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 54cd15bc0e..029b07cc66 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -92,10 +92,6 @@ class RegistrationStore(SQLBaseStore): query, user_id ) - def get_all_users(self): - return self._simple_select_list( - table="users", keyvalues=None, retcols=["name"]) - def get_user_by_token(self, token): """Get a user from the given access token. diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 6bd0b22ae5..750b17a45f 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -71,16 +71,6 @@ class RoomStore(SQLBaseStore): RoomsTable.decode_single_result, query, room_id, ) - def get_all_rooms(self): - """Retrieve all the rooms. - - Returns: - A list of namedtuples containing the room information. - """ - return self._simple_select_list( - table="rooms", keyvalues=None, retcols=["room_id"] - ) - @defer.inlineCallbacks def get_rooms(self, is_public): """Retrieve a list of all public rooms. diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 58aa376c20..3d0172d09b 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -180,6 +180,14 @@ class RoomMemberStore(SQLBaseStore): if not membership_list: return defer.succeed(None) + return self.runInteraction( + "get_rooms_for_user_where_membership_is", + self._get_rooms_for_user_where_membership_is_txn, + user_id, membership_list + ) + + def _get_rooms_for_user_where_membership_is_txn(self, txn, user_id, + membership_list): where_clause = "user_id = ? AND (%s)" % ( " OR ".join(["membership = ?" for _ in membership_list]), ) @@ -187,24 +195,18 @@ class RoomMemberStore(SQLBaseStore): args = [user_id] args.extend(membership_list) - def f(txn): - sql = ( - "SELECT m.room_id, m.sender, m.membership" - " FROM room_memberships as m" - " INNER JOIN current_state_events as c" - " ON m.event_id = c.event_id" - " WHERE %s" - ) % (where_clause,) - - txn.execute(sql, args) - return [ - RoomsForUser(**r) for r in self.cursor_to_dict(txn) - ] + sql = ( + "SELECT m.room_id, m.sender, m.membership" + " FROM room_memberships as m" + " INNER JOIN current_state_events as c" + " ON m.event_id = c.event_id" + " WHERE %s" + ) % (where_clause,) - return self.runInteraction( - "get_rooms_for_user_where_membership_is", - f - ) + txn.execute(sql, args) + return [ + RoomsForUser(**r) for r in self.cursor_to_dict(txn) + ] def get_joined_hosts_for_room(self, room_id): return self._simple_select_onecol( diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 865cb13e9e..09bc522210 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -146,18 +146,6 @@ class StreamStore(SQLBaseStore): defer.returnValue(([], to_key)) return - # Logic: - # - We want ALL events which match the AS room_id regex - # - We want ALL events which match the rooms represented by the AS - # room_alias regex - # - We want ALL events for rooms that AS users have joined. - # This is currently supported via get_app_service_rooms (which is used - # for the Notifier listener rooms). We can't reasonably make a SQL - # query for these room IDs, so we'll pull all the events between from/to - # and filter in python. - rooms_for_as = yield self.get_app_service_rooms(service) - room_ids_for_as = [r.room_id for r in rooms_for_as] - # select all the events between from/to with a sensible limit sql = ( "SELECT e.event_id, e.room_id, e.type, s.state_key, " @@ -169,20 +157,32 @@ class StreamStore(SQLBaseStore): "limit": limit } - def app_service_interested(row): - if row["room_id"] in room_ids_for_as: - return True - - if row["type"] == EventTypes.Member: - if service.is_interested_in_user(row.get("state_key")): - return True - return False - def f(txn): + # pull out all the events between the tokens txn.execute(sql, (from_id.stream, to_id.stream,)) - rows = self.cursor_to_dict(txn) + # Logic: + # - We want ALL events which match the AS room_id regex + # - We want ALL events which match the rooms represented by the AS + # room_alias regex + # - We want ALL events for rooms that AS users have joined. + # This is currently supported via get_app_service_rooms (which is + # used for the Notifier listener rooms). We can't reasonably make a + # SQL query for these room IDs, so we'll pull all the events between + # from/to and filter in python. + rooms_for_as = self._get_app_service_rooms_txn(txn, service) + room_ids_for_as = [r.room_id for r in rooms_for_as] + + def app_service_interested(row): + if row["room_id"] in room_ids_for_as: + return True + + if row["type"] == EventTypes.Member: + if service.is_interested_in_user(row.get("state_key")): + return True + return False + ret = self._get_events_txn( txn, # apply the filter on the room id list @@ -197,7 +197,6 @@ class StreamStore(SQLBaseStore): if rows: key = "s%d" % max(r["stream_ordering"] for r in rows) - else: # Assume we didn't get anything because there was nothing to # get. @@ -266,7 +265,6 @@ class StreamStore(SQLBaseStore): if rows: key = "s%d" % max(r["stream_ordering"] for r in rows) - else: # Assume we didn't get anything because there was nothing to # get. -- cgit 1.4.1 From cb97ea3ec236c23c745e59c3a857503dd8dc3410 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 11:23:46 +0000 Subject: PEP8 --- synapse/storage/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage/roommember.py') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 3d0172d09b..65ffb4627f 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -187,7 +187,7 @@ class RoomMemberStore(SQLBaseStore): ) def _get_rooms_for_user_where_membership_is_txn(self, txn, user_id, - membership_list): + membership_list): where_clause = "user_id = ? AND (%s)" % ( " OR ".join(["membership = ?" for _ in membership_list]), ) -- cgit 1.4.1