From 54172924c834a954fcbbc6224318140b9e95aa7d Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 1 Mar 2016 14:32:56 +0000 Subject: Load the current id in the IdGenerator constructor Rather than loading them lazily. This allows us to remove all the yield statements and spurious arguments for the get_next methods. It also allows us to replace all instances of get_next_txn with get_next since get_next no longer needs to access the db. --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 372b540002..8ed8a21b0a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -83,7 +83,7 @@ class StateStore(SQLBaseStore): if event.is_state(): state_events[(event.type, event.state_key)] = event - state_group = self._state_groups_id_gen.get_next_txn(txn) + state_group = self._state_groups_id_gen.get_next() self._simple_insert_txn( txn, table="state_groups", -- cgit 1.4.1 From 2c86187a1bd61901ecf4adca3dcced9f68cf26de Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 11:59:31 +0000 Subject: Don't cache events in _state_group_cache Instead, simply cache the event ids, relying on the event cache to cache the actual events. The problem was that while the state groups cache was limited in the number of groups it could hold, each individual group could consist of thousands of events. --- synapse/storage/state.py | 108 +++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 60 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 8ed8a21b0a..f06c734c4e 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -172,7 +172,7 @@ class StateStore(SQLBaseStore): defer.returnValue(events) def _get_state_groups_from_groups(self, groups, types): - """Returns dictionary state_group -> state event ids + """Returns dictionary state_group -> (dict of (type, state_key) -> event id) """ def f(txn, groups): if types is not None: @@ -183,7 +183,8 @@ class StateStore(SQLBaseStore): where_clause = "" sql = ( - "SELECT state_group, event_id FROM state_groups_state WHERE" + "SELECT state_group, event_id, type, state_key" + " FROM state_groups_state WHERE" " state_group IN (%s) %s" % ( ",".join("?" for _ in groups), where_clause, @@ -199,7 +200,8 @@ class StateStore(SQLBaseStore): results = {} for row in rows: - results.setdefault(row["state_group"], []).append(row["event_id"]) + key = (row["type"], row["state_key"]) + results.setdefault(row["state_group"], {})[key] = row["event_id"] return results chunks = [groups[i:i + 100] for i in xrange(0, len(groups), 100)] @@ -296,7 +298,7 @@ class StateStore(SQLBaseStore): where a `state_key` of `None` matches all state_keys for the `type`. """ - is_all, state_dict = self._state_group_cache.get(group) + is_all, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} missing_types = set() @@ -308,7 +310,7 @@ class StateStore(SQLBaseStore): if type_to_key.get(typ, object()) is not None: type_to_key.setdefault(typ, set()).add(state_key) - if (typ, state_key) not in state_dict: + if (typ, state_key) not in state_dict_ids: missing_types.add((typ, state_key)) sentinel = object() @@ -326,7 +328,7 @@ class StateStore(SQLBaseStore): got_all = not (missing_types or types is None) return { - k: v for k, v in state_dict.items() + k: v for k, v in state_dict_ids.items() if include(k[0], k[1]) }, missing_types, got_all @@ -340,8 +342,9 @@ class StateStore(SQLBaseStore): Args: group: The state group to lookup """ - is_all, state_dict = self._state_group_cache.get(group) - return state_dict, is_all + is_all, state_dict_ids = self._state_group_cache.get(group) + + return state_dict_ids, is_all @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): @@ -354,84 +357,69 @@ class StateStore(SQLBaseStore): missing_groups = [] if types is not None: for group in set(groups): - state_dict, missing_types, got_all = self._get_some_state_from_cache( + state_dict_ids, missing_types, got_all = self._get_some_state_from_cache( group, types ) - results[group] = state_dict + results[group] = state_dict_ids if not got_all: missing_groups.append(group) else: for group in set(groups): - state_dict, got_all = self._get_all_state_from_cache( + state_dict_ids, got_all = self._get_all_state_from_cache( group ) - results[group] = state_dict + + results[group] = state_dict_ids if not got_all: missing_groups.append(group) - if not missing_groups: - defer.returnValue({ - group: { - type_tuple: event - for type_tuple, event in state.items() - if event - } - for group, state in results.items() - }) + if missing_groups: + # Okay, so we have some missing_types, lets fetch them. + cache_seq_num = self._state_group_cache.sequence - # Okay, so we have some missing_types, lets fetch them. - cache_seq_num = self._state_group_cache.sequence + group_to_state_dict = yield self._get_state_groups_from_groups( + missing_groups, types + ) - group_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types - ) + # Now we want to update the cache with all the things we fetched + # from the database. + for group, group_state_dict in group_to_state_dict.items(): + if types: + # We delibrately put key -> None mappings into the cache to + # cache absence of the key, on the assumption that if we've + # explicitly asked for some types then we will probably ask + # for them again. + state_dict = {key: None for key in types} + state_dict.update(results[group]) + results[group] = state_dict + else: + state_dict = results[group] + + state_dict.update(group_state_dict) + + self._state_group_cache.update( + cache_seq_num, + key=group, + value=state_dict, + full=(types is None), + ) state_events = yield self._get_events( - [e_id for l in group_state_dict.values() for e_id in l], + [ev_id for sd in results.values() for ev_id in sd.values()], get_prev_content=False ) state_events = {e.event_id: e for e in state_events} - # Now we want to update the cache with all the things we fetched - # from the database. - for group, state_ids in group_state_dict.items(): - if types: - # We delibrately put key -> None mappings into the cache to - # cache absence of the key, on the assumption that if we've - # explicitly asked for some types then we will probably ask - # for them again. - state_dict = {key: None for key in types} - state_dict.update(results[group]) - results[group] = state_dict - else: - state_dict = results[group] - - for event_id in state_ids: - try: - state_event = state_events[event_id] - state_dict[(state_event.type, state_event.state_key)] = state_event - except KeyError: - # Hmm. So we do don't have that state event? Interesting. - logger.warn( - "Can't find state event %r for state group %r", - event_id, group, - ) - - self._state_group_cache.update( - cache_seq_num, - key=group, - value=state_dict, - full=(types is None), - ) - # Remove all the entries with None values. The None values were just # used for bookkeeping in the cache. for group, state_dict in results.items(): results[group] = { - key: event for key, event in state_dict.items() if event + key: state_events[event_id] + for key, event_id in state_dict.items() + if event_id and event_id in state_events } defer.returnValue(results) -- cgit 1.4.1 From 8b0dfc9fc4e98ea9c029c5f7b49efaf011ce1979 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 11:37:58 +0000 Subject: Don't cache events in get_current_state_for_key --- synapse/storage/events.py | 4 ++-- synapse/storage/state.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index e444b64cee..7bcddf1f31 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -157,7 +157,7 @@ class EventsStore(SQLBaseStore): # We purposefully do this first since if we include a `current_state` # key, we *want* to update the `current_state_events` table if current_state: - txn.call_after(self.get_current_state_for_key.invalidate_all) + txn.call_after(self._get_current_state_for_key.invalidate_all) txn.call_after(self.get_rooms_for_user.invalidate_all) txn.call_after(self.get_users_in_room.invalidate, (event.room_id,)) txn.call_after(self.get_joined_hosts_for_room.invalidate, (event.room_id,)) @@ -441,7 +441,7 @@ class EventsStore(SQLBaseStore): for event, _ in state_events_and_contexts: if not context.rejected: txn.call_after( - self.get_current_state_for_key.invalidate, + self._get_current_state_for_key.invalidate, (event.room_id, event.type, event.state_key,) ) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f06c734c4e..eab2c5a8ce 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -14,9 +14,7 @@ # limitations under the License. from ._base import SQLBaseStore -from synapse.util.caches.descriptors import ( - cached, cachedInlineCallbacks, cachedList -) +from synapse.util.caches.descriptors import cached, cachedList from twisted.internet import defer @@ -155,8 +153,14 @@ class StateStore(SQLBaseStore): events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) - @cachedInlineCallbacks(num_args=3) + @defer.inlineCallbacks def get_current_state_for_key(self, room_id, event_type, state_key): + event_ids = yield self._get_current_state_for_key(room_id, event_type, state_key) + events = yield self._get_events(event_ids, get_prev_content=False) + defer.returnValue(events) + + @cached(num_args=3) + def _get_current_state_for_key(self, room_id, event_type, state_key): def f(txn): sql = ( "SELECT event_id FROM current_state_events" @@ -167,9 +171,7 @@ class StateStore(SQLBaseStore): txn.execute(sql, args) results = txn.fetchall() return [r[0] for r in results] - event_ids = yield self.runInteraction("get_current_state_for_key", f) - events = yield self._get_events(event_ids, get_prev_content=False) - defer.returnValue(events) + return self.runInteraction("get_current_state_for_key", f) def _get_state_groups_from_groups(self, groups, types): """Returns dictionary state_group -> (dict of (type, state_key) -> event id) -- cgit 1.4.1 From 75daede92f041500347a5f446229be5ca50c2b8e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 18:22:52 +0000 Subject: String intern --- synapse/storage/state.py | 12 +++++++++--- synapse/util/caches/__init__.py | 8 ++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index eab2c5a8ce..1982b1c607 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -15,6 +15,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedList +from synapse.util.caches import intern_string from twisted.internet import defer @@ -155,7 +156,9 @@ class StateStore(SQLBaseStore): @defer.inlineCallbacks def get_current_state_for_key(self, room_id, event_type, state_key): - event_ids = yield self._get_current_state_for_key(room_id, event_type, state_key) + event_ids = yield self._get_current_state_for_key( + room_id, intern_string(event_type), intern_string(state_key) + ) events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) @@ -202,7 +205,7 @@ class StateStore(SQLBaseStore): results = {} for row in rows: - key = (row["type"], row["state_key"]) + key = (intern_string(row["type"]), intern_string(row["state_key"])) results.setdefault(row["state_group"], {})[key] = row["event_id"] return results @@ -393,7 +396,10 @@ class StateStore(SQLBaseStore): # cache absence of the key, on the assumption that if we've # explicitly asked for some types then we will probably ask # for them again. - state_dict = {key: None for key in types} + state_dict = { + (intern_string(etype), intern_string(state_key)): None + for (etype, state_key) in types + } state_dict.update(results[group]) results[group] = state_dict else: diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 1a14904194..9d450fade5 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -14,6 +14,7 @@ # limitations under the License. import synapse.metrics +from lrucache import LruCache DEBUG_CACHES = False @@ -25,3 +26,10 @@ cache_counter = metrics.register_cache( lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, labels=["name"], ) + +_string_cache = LruCache(5000) +caches_by_name["string_cache"] = _string_cache + + +def intern_string(string): + return _string_cache.setdefault(string, string) -- cgit 1.4.1 From acdfef7b1443a8260c43e31e9944b74dfdf286dc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 16:13:05 +0000 Subject: Intern all the things --- synapse/events/__init__.py | 11 ++----- synapse/federation/federation_client.py | 1 + synapse/federation/transport/server.py | 28 ++++++++-------- synapse/http/server.py | 10 +++--- synapse/storage/_base.py | 3 +- synapse/storage/receipts.py | 21 ++++++------ synapse/storage/state.py | 10 +++--- synapse/util/caches/__init__.py | 58 ++++++++++++++++++++++++++++++++- 8 files changed, 97 insertions(+), 45 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 63004eaf04..23f8b612ae 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -14,7 +14,7 @@ # limitations under the License. from synapse.util.frozenutils import freeze -from synapse.util.caches import intern_string +from synapse.util.caches import intern_dict # Whether we should use frozen_dict in FrozenEvent. Using frozen_dicts prevents @@ -143,14 +143,7 @@ class FrozenEvent(EventBase): # We intern these strings because they turn up a lot (especially when # caching). - event_dict["type"] = intern_string(event_dict["type"]) - if "state_key" in event_dict: - event_dict["state_key"] = intern_string(event_dict["state_key"]) - if "sender" in event_dict: - event_dict["sender"] = intern_string(event_dict["sender"]) - - event_dict["event_id"] = intern(event_dict["event_id"].encode('ascii')) - event_dict["room_id"] = intern(event_dict["room_id"].encode('ascii')) + event_dict = intern_dict(event_dict) if USE_FROZEN_DICTS: frozen_dict = freeze(event_dict) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 83c1f46586..37ee469fa2 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -418,6 +418,7 @@ class FederationClient(FederationBase): "Failed to make_%s via %s: %s", membership, destination, e.message ) + raise raise RuntimeError("Failed to send to any server.") diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 208bff8d4f..d65a7893d8 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -175,7 +175,7 @@ class BaseFederationServlet(object): class FederationSendServlet(BaseFederationServlet): - PATH = "/send/([^/]*)/" + PATH = "/send/(?P[^/]*)/" def __init__(self, handler, server_name, **kwargs): super(FederationSendServlet, self).__init__( @@ -250,7 +250,7 @@ class FederationPullServlet(BaseFederationServlet): class FederationEventServlet(BaseFederationServlet): - PATH = "/event/([^/]*)/" + PATH = "/event/(?P[^/]*)/" # This is when someone asks for a data item for a given server data_id pair. def on_GET(self, origin, content, query, event_id): @@ -258,7 +258,7 @@ class FederationEventServlet(BaseFederationServlet): class FederationStateServlet(BaseFederationServlet): - PATH = "/state/([^/]*)/" + PATH = "/state/(?P[^/]*)/" # This is when someone asks for all data for a given context. def on_GET(self, origin, content, query, context): @@ -270,7 +270,7 @@ class FederationStateServlet(BaseFederationServlet): class FederationBackfillServlet(BaseFederationServlet): - PATH = "/backfill/([^/]*)/" + PATH = "/backfill/(?P[^/]*)/" def on_GET(self, origin, content, query, context): versions = query["v"] @@ -285,7 +285,7 @@ class FederationBackfillServlet(BaseFederationServlet): class FederationQueryServlet(BaseFederationServlet): - PATH = "/query/([^/]*)" + PATH = "/query/(?P[^/]*)" # This is when we receive a server-server Query def on_GET(self, origin, content, query, query_type): @@ -296,7 +296,7 @@ class FederationQueryServlet(BaseFederationServlet): class FederationMakeJoinServlet(BaseFederationServlet): - PATH = "/make_join/([^/]*)/([^/]*)" + PATH = "/make_join/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_GET(self, origin, content, query, context, user_id): @@ -305,7 +305,7 @@ class FederationMakeJoinServlet(BaseFederationServlet): class FederationMakeLeaveServlet(BaseFederationServlet): - PATH = "/make_leave/([^/]*)/([^/]*)" + PATH = "/make_leave/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_GET(self, origin, content, query, context, user_id): @@ -314,7 +314,7 @@ class FederationMakeLeaveServlet(BaseFederationServlet): class FederationSendLeaveServlet(BaseFederationServlet): - PATH = "/send_leave/([^/]*)/([^/]*)" + PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, room_id, txid): @@ -323,14 +323,14 @@ class FederationSendLeaveServlet(BaseFederationServlet): class FederationEventAuthServlet(BaseFederationServlet): - PATH = "/event_auth/([^/]*)/([^/]*)" + PATH = "/event_auth(?P[^/]*)/(?P[^/]*)" def on_GET(self, origin, content, query, context, event_id): return self.handler.on_event_auth(origin, context, event_id) class FederationSendJoinServlet(BaseFederationServlet): - PATH = "/send_join/([^/]*)/([^/]*)" + PATH = "/send_join/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, context, event_id): @@ -341,7 +341,7 @@ class FederationSendJoinServlet(BaseFederationServlet): class FederationInviteServlet(BaseFederationServlet): - PATH = "/invite/([^/]*)/([^/]*)" + PATH = "/invite/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, context, event_id): @@ -352,7 +352,7 @@ class FederationInviteServlet(BaseFederationServlet): class FederationThirdPartyInviteExchangeServlet(BaseFederationServlet): - PATH = "/exchange_third_party_invite/([^/]*)" + PATH = "/exchange_third_party_invite/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, room_id): @@ -381,7 +381,7 @@ class FederationClientKeysClaimServlet(BaseFederationServlet): class FederationQueryAuthServlet(BaseFederationServlet): - PATH = "/query_auth/([^/]*)/([^/]*)" + PATH = "/query_auth/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_POST(self, origin, content, query, context, event_id): @@ -394,7 +394,7 @@ class FederationQueryAuthServlet(BaseFederationServlet): class FederationGetMissingEventsServlet(BaseFederationServlet): # TODO(paul): Why does this path alone end with "/?" optional? - PATH = "/get_missing_events/([^/]*)/?" + PATH = "/get_missing_events/(?P[^/]*)/?" @defer.inlineCallbacks def on_POST(self, origin, content, query, room_id): diff --git a/synapse/http/server.py b/synapse/http/server.py index b17b190ee5..b82196fd5e 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -18,6 +18,7 @@ from synapse.api.errors import ( cs_exception, SynapseError, CodeMessageException, UnrecognizedRequestError, Codes ) from synapse.util.logcontext import LoggingContext, PreserveLoggingContext +from synapse.util.caches import intern_dict import synapse.metrics import synapse.events @@ -229,11 +230,12 @@ class JsonResource(HttpServer, resource.Resource): else: servlet_classname = "%r" % callback - args = [ - urllib.unquote(u).decode("UTF-8") if u else u for u in m.groups() - ] + kwargs = intern_dict({ + name: urllib.unquote(value).decode("UTF-8") if value else value + for name, value in m.groupdict().items() + }) - callback_return = yield callback(request, *args) + callback_return = yield callback(request, **kwargs) if callback_return is not None: code, response = callback_return self._send_response(request, code, response) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 583b77a835..b75b79df36 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -18,6 +18,7 @@ from synapse.api.errors import StoreError from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.caches.dictionary_cache import DictionaryCache from synapse.util.caches.descriptors import Cache +from synapse.util.caches import intern_dict import synapse.metrics @@ -350,7 +351,7 @@ class SQLBaseStore(object): """ col_headers = list(column[0] for column in cursor.description) results = list( - dict(zip(col_headers, row)) for row in cursor.fetchall() + intern_dict(dict(zip(col_headers, row))) for row in cursor.fetchall() ) return results diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index dbc074d6b5..6b9d848eaa 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -62,18 +62,17 @@ class ReceiptsStore(SQLBaseStore): @cachedInlineCallbacks(num_args=2) def get_receipts_for_user(self, user_id, receipt_type): - def f(txn): - sql = ( - "SELECT room_id,event_id " - "FROM receipts_linearized " - "WHERE user_id = ? AND receipt_type = ? " - ) - txn.execute(sql, (user_id, receipt_type)) - return txn.fetchall() + rows = yield self._simple_select_list( + table="receipts_linearized", + keyvalues={ + "user_id": user_id, + "receipt_type": receipt_type, + }, + retcols=("room_id", "event_id"), + desc="get_receipts_for_user", + ) - defer.returnValue(dict( - (yield self.runInteraction("get_receipts_for_user", f)) - )) + defer.returnValue({row["room_id"]: row["event_id"] for row in rows}) @defer.inlineCallbacks def get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None): diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1982b1c607..03eecbbbb6 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -156,9 +156,7 @@ class StateStore(SQLBaseStore): @defer.inlineCallbacks def get_current_state_for_key(self, room_id, event_type, state_key): - event_ids = yield self._get_current_state_for_key( - room_id, intern_string(event_type), intern_string(state_key) - ) + event_ids = yield self._get_current_state_for_key(room_id, event_type, state_key) events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) @@ -205,7 +203,7 @@ class StateStore(SQLBaseStore): results = {} for row in rows: - key = (intern_string(row["type"]), intern_string(row["state_key"])) + key = (row["type"], row["state_key"]) results.setdefault(row["state_group"], {})[key] = row["event_id"] return results @@ -286,7 +284,9 @@ class StateStore(SQLBaseStore): desc="_get_state_group_for_events", ) - defer.returnValue({row["event_id"]: row["state_group"] for row in rows}) + defer.returnValue({ + intern(row["event_id"].encode('ascii')): row["state_group"] for row in rows + }) def _get_some_state_from_cache(self, group, types): """Checks if group is in cache. See `_get_state_for_groups` diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 9d450fade5..838cec45fe 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -15,6 +15,9 @@ import synapse.metrics from lrucache import LruCache +import os + +CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1)) DEBUG_CACHES = False @@ -27,9 +30,62 @@ cache_counter = metrics.register_cache( labels=["name"], ) -_string_cache = LruCache(5000) +_string_cache = LruCache(int(5000 * CACHE_SIZE_FACTOR)) caches_by_name["string_cache"] = _string_cache +KNOWN_KEYS = { + key: key for key in + ( + "auth_events", + "content", + "depth", + "event_id", + "hashes", + "origin", + "origin_server_ts", + "prev_events", + "room_id", + "sender", + "signatures", + "state_key", + "type", + "unsigned", + "user_id", + ) +} + + def intern_string(string): + """Takes a (potentially) unicode string and interns using custom cache + """ return _string_cache.setdefault(string, string) + + +def intern_dict(dictionary): + """Takes a dictionary and interns well known keys and their values + """ + return _intern_known_values({ + _intern_key(key): value for key, value in dictionary.items() + }) + + +def _intern_known_values(dictionary): + intern_str_keys = ("event_id", "room_id") + intern_unicode_keys = ("sender", "user_id", "type", "state_key") + + for key in intern_str_keys: + val = dictionary.get(key, None) + if val is not None: + dictionary[key] = intern(val.encode('ascii')) + + for key in intern_unicode_keys: + val = dictionary.get(key, None) + if val is not None: + dictionary[key] = intern_string(val) + + return dictionary + + +def _intern_key(key): + return KNOWN_KEYS.get(key, key) -- cgit 1.4.1 From 2f0180b09e2a2afeed418a5840ae6b4fffcb4be4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 16:29:46 +0000 Subject: Don't bother interning keys that are already interned --- synapse/storage/state.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 03eecbbbb6..02cefdff26 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -284,9 +284,7 @@ class StateStore(SQLBaseStore): desc="_get_state_group_for_events", ) - defer.returnValue({ - intern(row["event_id"].encode('ascii')): row["state_group"] for row in rows - }) + defer.returnValue({row["event_id"]: row["state_group"] for row in rows}) def _get_some_state_from_cache(self, group, types): """Checks if group is in cache. See `_get_state_for_groups` -- cgit 1.4.1