From 6c28ac260c2ce4bf93737e53ea3297bff08924c7 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 7 Jan 2016 04:26:29 +0000 Subject: copyrights --- synapse/storage/receipts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage/receipts.py') diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index a535063547..21cf88b3da 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2014, 2015 OpenMarket Ltd +# Copyright 2014-2016 OpenMarket Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. -- cgit 1.4.1 From 5a7d1ecffcab7a94caf70471a2eec56eb868573c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jan 2016 16:01:05 +0000 Subject: Add regex cache. Only caculate push actions for users that have sent read receipts, and are on that server --- synapse/handlers/_base.py | 2 +- synapse/handlers/federation.py | 2 +- synapse/push/action_generator.py | 7 ++++--- synapse/push/bulk_push_rule_evaluator.py | 15 ++++++++++----- synapse/push/push_rule_evaluator.py | 20 +++++++++++++++++--- synapse/server.py | 4 ++++ synapse/storage/receipts.py | 14 +++++++++++++- 7 files changed, 50 insertions(+), 14 deletions(-) (limited to 'synapse/storage/receipts.py') diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 2d1167296a..5c7617de44 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -266,7 +266,7 @@ class BaseHandler(object): event, context=context ) - action_generator = ActionGenerator(self.store) + action_generator = ActionGenerator(self.hs) yield action_generator.handle_push_actions_for_event( event, self ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 4b94940e99..6c19d6ae8c 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -245,7 +245,7 @@ class FederationHandler(BaseHandler): yield user_joined_room(self.distributor, user, event.room_id) if not backfilled and not event.internal_metadata.is_outlier(): - action_generator = ActionGenerator(self.store) + action_generator = ActionGenerator(self.hs) yield action_generator.handle_push_actions_for_event( event, self ) diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py index 4cf94f6c61..1d2e558f9a 100644 --- a/synapse/push/action_generator.py +++ b/synapse/push/action_generator.py @@ -25,8 +25,9 @@ logger = logging.getLogger(__name__) class ActionGenerator: - def __init__(self, store): - self.store = store + def __init__(self, hs): + self.hs = hs + self.store = hs.get_datastore() # really we want to get all user ids and all profile tags too, # since we want the actions for each profile tag for every user and # also actions for a client with no profile tag for each user. @@ -42,7 +43,7 @@ class ActionGenerator: ) bulk_evaluator = yield bulk_push_rule_evaluator.evaluator_for_room_id( - event.room_id, self.store + event.room_id, self.hs, self.store ) actions_by_user = yield bulk_evaluator.action_for_event_by_user(event, handler) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index efd686fa6e..1000ae6301 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -36,6 +36,7 @@ def decode_rule_json(rule): @defer.inlineCallbacks def _get_rules(room_id, user_ids, store): rules_by_user = yield store.bulk_get_push_rules(user_ids) + rules_by_user = { uid: baserules.list_with_base_rules([ decode_rule_json(rule_list) @@ -47,12 +48,16 @@ def _get_rules(room_id, user_ids, store): @defer.inlineCallbacks -def evaluator_for_room_id(room_id, store): - users = yield store.get_users_in_room(room_id) - rules_by_user = yield _get_rules(room_id, users, store) +def evaluator_for_room_id(room_id, hs, store): + results = yield store.get_receipts_for_room(room_id, "m.read") + user_ids = [ + row["user_id"] for row in results + if hs.is_mine_id(row["user_id"]) + ] + rules_by_user = yield _get_rules(room_id, user_ids, store) defer.returnValue(BulkPushRuleEvaluator( - room_id, rules_by_user, users, store + room_id, rules_by_user, user_ids, store )) @@ -129,7 +134,7 @@ def _condition_checker(evaluator, conditions, uid, display_name, cache): res = evaluator.matches(cond, uid, display_name, None) if _id: - cache[_id] = res + cache[_id] = bool(res) if not res: return False diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 4654994d2d..753b6469e2 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -22,6 +22,7 @@ import simplejson as json import re from synapse.types import UserID +from synapse.util.caches.lrucache import LruCache logger = logging.getLogger(__name__) @@ -277,18 +278,18 @@ def _glob_matches(glob, value, word_boundary=False): ) if word_boundary: r = r"\b%s\b" % (r,) - r = re.compile(r, flags=re.IGNORECASE) + r = _compile_regex(r) return r.search(value) else: r = r + "$" - r = re.compile(r, flags=re.IGNORECASE) + r = _compile_regex(r) return r.match(value) elif word_boundary: r = re.escape(glob) r = r"\b%s\b" % (r,) - r = re.compile(r, flags=re.IGNORECASE) + r = _compile_regex(r) return r.search(value) else: @@ -306,3 +307,16 @@ def _flatten_dict(d, prefix=[], result={}): _flatten_dict(value, prefix=(prefix+[key]), result=result) return result + + +regex_cache = LruCache(100000) + + +def _compile_regex(regex_str): + r = regex_cache.get(regex_str, None) + if r: + return r + + r = re.compile(regex_str, flags=re.IGNORECASE) + regex_cache[regex_str] = r + return r diff --git a/synapse/server.py b/synapse/server.py index ffd4f936d0..63f9059837 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -96,6 +96,7 @@ class BaseHomeServer(object): hostname : The hostname for the server. """ self.hostname = hostname + self.hostname_with_colon = ":" + hostname self._building = {} # Other kwargs are explicit dependencies @@ -139,6 +140,9 @@ class BaseHomeServer(object): def is_mine(self, domain_specific_string): return domain_specific_string.domain == self.hostname + def is_mine_id(self, string): + return string.endswith(self.hostname_with_colon) + # Build magic accessors for every dependency for depname in BaseHomeServer.DEPENDENCIES: BaseHomeServer._make_dependency_method(depname) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 21cf88b3da..c80e576620 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -14,7 +14,7 @@ # limitations under the License. from ._base import SQLBaseStore -from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList +from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList, cached from synapse.util.caches import cache_counter, caches_by_name from twisted.internet import defer @@ -33,6 +33,18 @@ class ReceiptsStore(SQLBaseStore): self._receipts_stream_cache = _RoomStreamChangeCache() + @cached(num_args=2) + def get_receipts_for_room(self, room_id, receipt_type): + return self._simple_select_list( + table="receipts_linearized", + keyvalues={ + "room_id": room_id, + "receipt_type": receipt_type, + }, + retcols=("user_id", "event_id"), + desc="get_receipts_for_room", + ) + @defer.inlineCallbacks def get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None): """Get receipts for multiple rooms for sending to clients. -- cgit 1.4.1 From 3fa344c0376045d1be396701978ba46c552065b6 Mon Sep 17 00:00:00 2001 From: David Baker Date: Wed, 20 Jan 2016 15:30:31 +0000 Subject: Add storage function to get all receipts for a user. Also add some cache invalidation to the receipts storage because there wasn't any, and remove a method that was unused. --- synapse/push/__init__.py | 28 +++++------------------ synapse/storage/receipts.py | 55 ++++++++++++++++++++++++++++----------------- 2 files changed, 40 insertions(+), 43 deletions(-) (limited to 'synapse/storage/receipts.py') diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index ffae02a285..9a4af2b3ca 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -16,9 +16,8 @@ from twisted.internet import defer from synapse.streams.config import PaginationConfig -from synapse.types import StreamToken, UserID +from synapse.types import StreamToken from synapse.api.constants import Membership -from synapse.api.filtering import FilterCollection import synapse.util.async import push_rule_evaluator as push_rule_evaluator @@ -290,22 +289,9 @@ class Pusher(object): membership_list=(Membership.INVITE, Membership.JOIN) ) - user_is_guest = yield self.store.is_guest(self.user_id) - - # XXX: importing inside method to break circular dependency. - # should sort out the mess by moving all this logic out of - # push/__init__.py and probably moving the logic we use from the sync - # handler to somewhere more amenable to re-use. - from synapse.handlers.sync import SyncConfig - sync_config = SyncConfig( - user=UserID.from_string(self.user_id), - filter=FilterCollection({}), - is_guest=user_is_guest, - ) - now_token = yield self.hs.get_event_sources().get_current_token() - sync_handler = self.hs.get_handlers().sync_handler - _, ephemeral_by_room = yield sync_handler.ephemeral_by_room( - sync_config, now_token + my_receipts_by_room = yield self.store.get_receipts_for_user( + self.user_id, + "m.read", ) badge = 0 @@ -314,11 +300,9 @@ class Pusher(object): if r.membership == Membership.INVITE: badge += 1 else: - last_unread_event_id = sync_handler.last_read_event_id_for_room_and_user( - r.room_id, self.user_id, ephemeral_by_room - ) + if r.room_id in my_receipts_by_room: + last_unread_event_id = my_receipts_by_room[r.room_id] - if last_unread_event_id: notifs = yield ( self.store.get_unread_event_push_actions_by_room_for_user( r.room_id, self.user_id, last_unread_event_id diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index c80e576620..018140f47a 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -45,6 +45,21 @@ class ReceiptsStore(SQLBaseStore): desc="get_receipts_for_room", ) + @cachedInlineCallbacks(num_args=2) + def get_receipts_for_user(self, user_id, receipt_type): + def f(txn): + sql = ( + "SELECT room_id,event_id " + "FROM receipts_linearized " + "WHERE user_id = ? AND receipt_type = ? " + ) + txn.execute(sql, (user_id, receipt_type)) + return txn.fetchall() + + defer.returnValue(dict( + (yield self.runInteraction("get_receipts_for_user", f)) + )) + @defer.inlineCallbacks def get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None): """Get receipts for multiple rooms for sending to clients. @@ -194,29 +209,16 @@ class ReceiptsStore(SQLBaseStore): def get_max_receipt_stream_id(self): return self._receipts_id_gen.get_max_token(self) - @cachedInlineCallbacks() - def get_graph_receipts_for_room(self, room_id): - """Get receipts for sending to remote servers. - """ - rows = yield self._simple_select_list( - table="receipts_graph", - keyvalues={"room_id": room_id}, - retcols=["receipt_type", "user_id", "event_id"], - desc="get_linearized_receipts_for_room", - ) - - result = {} - for row in rows: - result.setdefault( - row["user_id"], {} - ).setdefault( - row["receipt_type"], [] - ).append(row["event_id"]) - - defer.returnValue(result) - def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): + txn.call_after( + self.get_receipts_for_room.invalidate, (room_id, receipt_type) + ) + txn.call_after( + self.get_receipts_for_user.invalidate, (user_id, receipt_type) + ) + # FIXME: This shouldn't invalidate the whole cache + txn.call_after(self.get_linearized_receipts_for_room.invalidate_all) # We don't want to clobber receipts for more recent events, so we # have to compare orderings of existing receipts @@ -324,6 +326,7 @@ class ReceiptsStore(SQLBaseStore): ) max_persisted_id = yield self._stream_id_gen.get_max_token(self) + defer.returnValue((stream_id, max_persisted_id)) def insert_graph_receipt(self, room_id, receipt_type, user_id, event_ids, @@ -336,6 +339,16 @@ class ReceiptsStore(SQLBaseStore): def insert_graph_receipt_txn(self, txn, room_id, receipt_type, user_id, event_ids, data): + txn.call_after( + self.get_receipts_for_room.invalidate, (room_id, receipt_type) + ) + txn.call_after( + self.get_receipts_for_user.invalidate, (user_id, receipt_type) + ) + # FIXME: This shouldn't invalidate the whole cache + txn.call_after(self.get_linearized_receipts_for_room.invalidate_all) + + self._simple_delete_txn( txn, table="receipts_graph", -- cgit 1.4.1 From 367cfab4e633c892e0d662e3abcd8e1a9c7f2daf Mon Sep 17 00:00:00 2001 From: David Baker Date: Wed, 20 Jan 2016 16:05:09 +0000 Subject: peppate --- synapse/storage/receipts.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'synapse/storage/receipts.py') diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 018140f47a..c4232bdc65 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -57,7 +57,7 @@ class ReceiptsStore(SQLBaseStore): return txn.fetchall() defer.returnValue(dict( - (yield self.runInteraction("get_receipts_for_user", f)) + (yield self.runInteraction("get_receipts_for_user", f)) )) @defer.inlineCallbacks @@ -212,7 +212,7 @@ class ReceiptsStore(SQLBaseStore): def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): txn.call_after( - self.get_receipts_for_room.invalidate, (room_id, receipt_type) + self.get_receipts_for_room.invalidate, (room_id, receipt_type) ) txn.call_after( self.get_receipts_for_user.invalidate, (user_id, receipt_type) @@ -348,7 +348,6 @@ class ReceiptsStore(SQLBaseStore): # FIXME: This shouldn't invalidate the whole cache txn.call_after(self.get_linearized_receipts_for_room.invalidate_all) - self._simple_delete_txn( txn, table="receipts_graph", -- cgit 1.4.1 From 87f9477b105b4e8216d1df186492ec6d9872967f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 26 Jan 2016 15:51:06 +0000 Subject: Add a Homeserver.setup method. This is for setting up dependencies that require work on startup. This is useful for the DataStore that wants to read a bunch from the database before initiliazing. --- synapse/app/homeserver.py | 33 ++++++++++++++--------- synapse/server.py | 32 ++++++++++++----------- synapse/storage/__init__.py | 45 +++++++++++++++++++++++++++++--- synapse/storage/_base.py | 49 ++++++++++++++++------------------- synapse/storage/events.py | 14 ++++------ synapse/storage/receipts.py | 8 +++--- synapse/storage/stream.py | 13 ---------- synapse/storage/tags.py | 7 ----- synapse/storage/util/id_generators.py | 36 +++++++------------------ 9 files changed, 121 insertions(+), 116 deletions(-) (limited to 'synapse/storage/receipts.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 795c655ae3..fb76be58a2 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -254,6 +254,17 @@ class SynapseHomeServer(HomeServer): except IncorrectDatabaseSetup as e: quit_with_error(e.message) + def get_db_conn(self): + db_conn = self.database_engine.module.connect( + **{ + k: v for k, v in self.db_config.get("args", {}).items() + if not k.startswith("cp_") + } + ) + + self.database_engine.on_new_connection(db_conn) + return db_conn + def quit_with_error(error_string): message_lines = error_string.split("\n") @@ -390,13 +401,7 @@ def setup(config_options): logger.info("Preparing database: %s...", config.database_config['name']) try: - db_conn = database_engine.module.connect( - **{ - k: v for k, v in config.database_config.get("args", {}).items() - if not k.startswith("cp_") - } - ) - + db_conn = hs.get_db_conn() database_engine.prepare_database(db_conn) hs.run_startup_checks(db_conn, database_engine) @@ -411,13 +416,17 @@ def setup(config_options): logger.info("Database prepared in %s.", config.database_config['name']) + hs.setup() hs.start_listening() - hs.get_pusherpool().start() - hs.get_state_handler().start_caching() - hs.get_datastore().start_profiling() - hs.get_datastore().start_doing_background_updates() - hs.get_replication_layer().start_get_pdu_cache() + def start(): + hs.get_pusherpool().start() + hs.get_state_handler().start_caching() + hs.get_datastore().start_profiling() + hs.get_datastore().start_doing_background_updates() + hs.get_replication_layer().start_get_pdu_cache() + + reactor.callWhenRunning(start) return hs diff --git a/synapse/server.py b/synapse/server.py index a59e46ca2d..006e91b37c 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -21,6 +21,7 @@ # Imports required for the default HomeServer() implementation from twisted.web.client import BrowserLikePolicyForHTTPS from twisted.enterprise import adbapi +from twisted.internet import defer from synapse.federation import initialize_http_replication from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory @@ -28,7 +29,7 @@ from synapse.notifier import Notifier from synapse.api.auth import Auth from synapse.handlers import Handlers from synapse.state import StateHandler -from synapse.storage import DataStore +from synapse.storage import get_datastore from synapse.util import Clock from synapse.util.distributor import Distributor from synapse.streams.events import EventSources @@ -40,6 +41,11 @@ from synapse.api.filtering import Filtering from synapse.http.matrixfederationclient import MatrixFederationHttpClient +import logging + + +logger = logging.getLogger(__name__) + class HomeServer(object): """A basic homeserver object without lazy component builders. @@ -102,10 +108,19 @@ class HomeServer(object): self.hostname = hostname self._building = {} + self.clock = Clock() + self.distributor = Distributor() + self.ratelimiter = Ratelimiter() + # Other kwargs are explicit dependencies for depname in kwargs: setattr(self, depname, kwargs[depname]) + def setup(self): + logger.info("Setting up.") + self.datastore = get_datastore(self) + logger.info("Finished setting up.") + def get_ip_from_request(self, request): # X-Forwarded-For is handled by our custom request type. return request.getClientIP() @@ -116,15 +131,9 @@ class HomeServer(object): def is_mine_id(self, string): return string.split(":", 1)[1] == self.hostname - def build_clock(self): - return Clock() - def build_replication_layer(self): return initialize_http_replication(self) - def build_datastore(self): - return DataStore(self) - def build_handlers(self): return Handlers(self) @@ -135,10 +144,9 @@ class HomeServer(object): return Auth(self) def build_http_client_context_factory(self): - config = self.get_config() return ( InsecureInterceptableContextFactory() - if config.use_insecure_ssl_client_just_for_testing_do_not_use + if self.config.use_insecure_ssl_client_just_for_testing_do_not_use else BrowserLikePolicyForHTTPS() ) @@ -157,15 +165,9 @@ class HomeServer(object): def build_state_handler(self): return StateHandler(self) - def build_distributor(self): - return Distributor() - def build_event_sources(self): return EventSources(self) - def build_ratelimiter(self): - return Ratelimiter() - def build_keyring(self): return Keyring(self) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 7a3f6c4662..c8cab45f77 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -46,6 +46,9 @@ from .tags import TagsStore from .account_data import AccountDataStore +from util.id_generators import IdGenerator, StreamIdGenerator + + import logging @@ -58,6 +61,22 @@ logger = logging.getLogger(__name__) LAST_SEEN_GRANULARITY = 120*1000 +def get_datastore(hs): + logger.info("getting called!") + + conn = hs.get_db_conn() + try: + cur = conn.cursor() + cur.execute("SELECT MIN(stream_ordering) FROM events",) + rows = cur.fetchall() + min_token = rows[0][0] if rows and rows[0] and rows[0][0] else -1 + min_token = min(min_token, -1) + + return DataStore(conn, hs, min_token) + finally: + conn.close() + + class DataStore(RoomMemberStore, RoomStore, RegistrationStore, StreamStore, ProfileStore, PresenceStore, TransactionStore, @@ -79,18 +98,36 @@ class DataStore(RoomMemberStore, RoomStore, EventPushActionsStore ): - def __init__(self, hs): - super(DataStore, self).__init__(hs) + def __init__(self, db_conn, hs, min_stream_token): self.hs = hs - self.min_token_deferred = self._get_min_token() - self.min_token = None + self.min_stream_token = min_stream_token self.client_ip_last_seen = Cache( name="client_ip_last_seen", keylen=4, ) + self._stream_id_gen = StreamIdGenerator( + db_conn, "events", "stream_ordering" + ) + self._receipts_id_gen = StreamIdGenerator( + db_conn, "receipts_linearized", "stream_id" + ) + self._account_data_id_gen = StreamIdGenerator( + db_conn, "account_data_max_stream_id", "stream_id" + ) + + self._transaction_id_gen = IdGenerator("sent_transactions", "id", self) + self._state_groups_id_gen = IdGenerator("state_groups", "id", self) + self._access_tokens_id_gen = IdGenerator("access_tokens", "id", self) + self._refresh_tokens_id_gen = IdGenerator("refresh_tokens", "id", self) + self._pushers_id_gen = IdGenerator("pushers", "id", self) + self._push_rule_id_gen = IdGenerator("push_rules", "id", self) + self._push_rules_enable_id_gen = IdGenerator("push_rules_enable", "id", self) + + super(DataStore, self).__init__(hs) + @defer.inlineCallbacks def insert_client_ip(self, user, access_token, ip, user_agent): now = int(self._clock.time_msec()) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 90d7aee94a..5e77320540 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -15,13 +15,11 @@ import logging from synapse.api.errors import StoreError -from synapse.util.logutils import log_function from synapse.util.logcontext import preserve_context_over_fn, LoggingContext from synapse.util.caches.dictionary_cache import DictionaryCache from synapse.util.caches.descriptors import Cache import synapse.metrics -from util.id_generators import IdGenerator, StreamIdGenerator from twisted.internet import defer @@ -175,16 +173,6 @@ class SQLBaseStore(object): self.database_engine = hs.database_engine - self._stream_id_gen = StreamIdGenerator("events", "stream_ordering") - self._transaction_id_gen = IdGenerator("sent_transactions", "id", self) - self._state_groups_id_gen = IdGenerator("state_groups", "id", self) - self._access_tokens_id_gen = IdGenerator("access_tokens", "id", self) - self._refresh_tokens_id_gen = IdGenerator("refresh_tokens", "id", self) - self._pushers_id_gen = IdGenerator("pushers", "id", self) - self._push_rule_id_gen = IdGenerator("push_rules", "id", self) - self._push_rules_enable_id_gen = IdGenerator("push_rules_enable", "id", self) - self._receipts_id_gen = StreamIdGenerator("receipts_linearized", "stream_id") - def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() @@ -345,7 +333,8 @@ class SQLBaseStore(object): defer.returnValue(result) - def cursor_to_dict(self, cursor): + @staticmethod + def cursor_to_dict(cursor): """Converts a SQL cursor into an list of dicts. Args: @@ -402,8 +391,8 @@ class SQLBaseStore(object): if not or_ignore: raise - @log_function - def _simple_insert_txn(self, txn, table, values): + @staticmethod + def _simple_insert_txn(txn, table, values): keys, vals = zip(*values.items()) sql = "INSERT INTO %s (%s) VALUES(%s)" % ( @@ -414,7 +403,8 @@ class SQLBaseStore(object): txn.execute(sql, vals) - def _simple_insert_many_txn(self, txn, table, values): + @staticmethod + def _simple_insert_many_txn(txn, table, values): if not values: return @@ -537,9 +527,10 @@ class SQLBaseStore(object): table, keyvalues, retcol, allow_none=allow_none, ) - def _simple_select_one_onecol_txn(self, txn, table, keyvalues, retcol, + @classmethod + def _simple_select_one_onecol_txn(cls, txn, table, keyvalues, retcol, allow_none=False): - ret = self._simple_select_onecol_txn( + ret = cls._simple_select_onecol_txn( txn, table=table, keyvalues=keyvalues, @@ -554,7 +545,8 @@ class SQLBaseStore(object): else: raise StoreError(404, "No row found") - def _simple_select_onecol_txn(self, txn, table, keyvalues, retcol): + @staticmethod + def _simple_select_onecol_txn(txn, table, keyvalues, retcol): sql = ( "SELECT %(retcol)s FROM %(table)s WHERE %(where)s" ) % { @@ -603,7 +595,8 @@ class SQLBaseStore(object): table, keyvalues, retcols ) - def _simple_select_list_txn(self, txn, table, keyvalues, retcols): + @classmethod + def _simple_select_list_txn(cls, txn, table, keyvalues, retcols): """Executes a SELECT query on the named table, which may return zero or more rows, returning the result as a list of dicts. @@ -627,7 +620,7 @@ class SQLBaseStore(object): ) txn.execute(sql) - return self.cursor_to_dict(txn) + return cls.cursor_to_dict(txn) @defer.inlineCallbacks def _simple_select_many_batch(self, table, column, iterable, retcols, @@ -662,7 +655,8 @@ class SQLBaseStore(object): defer.returnValue(results) - def _simple_select_many_txn(self, txn, table, column, iterable, keyvalues, retcols): + @classmethod + def _simple_select_many_txn(cls, txn, table, column, iterable, keyvalues, retcols): """Executes a SELECT query on the named table, which may return zero or more rows, returning the result as a list of dicts. @@ -699,7 +693,7 @@ class SQLBaseStore(object): ) txn.execute(sql, values) - return self.cursor_to_dict(txn) + return cls.cursor_to_dict(txn) def _simple_update_one(self, table, keyvalues, updatevalues, desc="_simple_update_one"): @@ -726,7 +720,8 @@ class SQLBaseStore(object): table, keyvalues, updatevalues, ) - def _simple_update_one_txn(self, txn, table, keyvalues, updatevalues): + @staticmethod + def _simple_update_one_txn(txn, table, keyvalues, updatevalues): update_sql = "UPDATE %s SET %s WHERE %s" % ( table, ", ".join("%s = ?" % (k,) for k in updatevalues), @@ -743,7 +738,8 @@ class SQLBaseStore(object): if txn.rowcount > 1: raise StoreError(500, "More than one row matched") - def _simple_select_one_txn(self, txn, table, keyvalues, retcols, + @staticmethod + def _simple_select_one_txn(txn, table, keyvalues, retcols, allow_none=False): select_sql = "SELECT %s FROM %s WHERE %s" % ( ", ".join(retcols), @@ -784,7 +780,8 @@ class SQLBaseStore(object): raise StoreError(500, "more than one row matched") return self.runInteraction(desc, func) - def _simple_delete_txn(self, txn, table, keyvalues): + @staticmethod + def _simple_delete_txn(txn, table, keyvalues): sql = "DELETE FROM %s WHERE %s" % ( table, " AND ".join("%s = ?" % (k, ) for k in keyvalues) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ba368a3eca..298cb9bada 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -66,11 +66,9 @@ class EventsStore(SQLBaseStore): return if backfilled: - if not self.min_token_deferred.called: - yield self.min_token_deferred - start = self.min_token - 1 - self.min_token -= len(events_and_contexts) + 1 - stream_orderings = range(start, self.min_token, -1) + start = self.min_stream_token - 1 + self.min_stream_token -= len(events_and_contexts) + 1 + stream_orderings = range(start, self.min_stream_token, -1) @contextmanager def stream_ordering_manager(): @@ -107,10 +105,8 @@ class EventsStore(SQLBaseStore): is_new_state=True, current_state=None): stream_ordering = None if backfilled: - if not self.min_token_deferred.called: - yield self.min_token_deferred - self.min_token -= 1 - stream_ordering = self.min_token + self.min_stream_token -= 1 + stream_ordering = self.min_stream_token if stream_ordering is None: stream_ordering_manager = yield self._stream_id_gen.get_next(self) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index c4232bdc65..c0593e23ee 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -31,7 +31,9 @@ class ReceiptsStore(SQLBaseStore): def __init__(self, hs): super(ReceiptsStore, self).__init__(hs) - self._receipts_stream_cache = _RoomStreamChangeCache() + self._receipts_stream_cache = _RoomStreamChangeCache( + self._receipts_id_gen.get_max_token(None) + ) @cached(num_args=2) def get_receipts_for_room(self, room_id, receipt_type): @@ -377,11 +379,11 @@ class _RoomStreamChangeCache(object): may have changed since that key. If the key is too old then the cache will simply return all rooms. """ - def __init__(self, size_of_cache=10000): + def __init__(self, current_key, size_of_cache=10000): self._size_of_cache = size_of_cache self._room_to_key = {} self._cache = sorteddict() - self._earliest_key = None + self._earliest_key = current_key self.name = "ReceiptsRoomChangeCache" caches_by_name[self.name] = self._cache diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 02b1913e26..e31bad258a 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -444,19 +444,6 @@ class StreamStore(SQLBaseStore): rows = txn.fetchall() return rows[0][0] if rows else 0 - @defer.inlineCallbacks - def _get_min_token(self): - row = yield self._execute( - "_get_min_token", None, "SELECT MIN(stream_ordering) FROM events" - ) - - self.min_token = row[0][0] if row and row[0] and row[0][0] else -1 - self.min_token = min(self.min_token, -1) - - logger.debug("min_token is: %s", self.min_token) - - defer.returnValue(self.min_token) - @staticmethod def _set_before_and_after(events, rows): for event, row in zip(events, rows): diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index ed9c91e5ea..4c39e07cbd 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -16,7 +16,6 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached from twisted.internet import defer -from .util.id_generators import StreamIdGenerator import ujson as json import logging @@ -25,12 +24,6 @@ logger = logging.getLogger(__name__) class TagsStore(SQLBaseStore): - def __init__(self, hs): - super(TagsStore, self).__init__(hs) - - self._account_data_id_gen = StreamIdGenerator( - "account_data_max_stream_id", "stream_id" - ) def get_max_account_data_stream_id(self): """Get the current max stream id for the private user data stream diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index f58bf7fd2c..5c522f4ab9 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -72,28 +72,24 @@ class StreamIdGenerator(object): with stream_id_gen.get_next_txn(txn) as stream_id: # ... persist event ... """ - def __init__(self, table, column): + def __init__(self, db_conn, table, column): self.table = table self.column = column self._lock = threading.Lock() - self._current_max = None + cur = db_conn.cursor() + self._current_max = self._get_or_compute_current_max(cur) + cur.close() + self._unfinished_ids = deque() - @defer.inlineCallbacks def get_next(self, store): """ Usage: with yield stream_id_gen.get_next as stream_id: # ... persist event ... """ - if not self._current_max: - yield store.runInteraction( - "_compute_current_max", - self._get_or_compute_current_max, - ) - with self._lock: self._current_max += 1 next_id = self._current_max @@ -108,21 +104,14 @@ class StreamIdGenerator(object): with self._lock: self._unfinished_ids.remove(next_id) - defer.returnValue(manager()) + return manager() - @defer.inlineCallbacks def get_next_mult(self, store, n): """ Usage: with yield stream_id_gen.get_next(store, n) as stream_ids: # ... persist events ... """ - if not self._current_max: - yield store.runInteraction( - "_compute_current_max", - self._get_or_compute_current_max, - ) - with self._lock: next_ids = range(self._current_max + 1, self._current_max + n + 1) self._current_max += n @@ -139,24 +128,17 @@ class StreamIdGenerator(object): for next_id in next_ids: self._unfinished_ids.remove(next_id) - defer.returnValue(manager()) + return manager() - @defer.inlineCallbacks def get_max_token(self, store): """Returns the maximum stream id such that all stream ids less than or equal to it have been successfully persisted. """ - if not self._current_max: - yield store.runInteraction( - "_compute_current_max", - self._get_or_compute_current_max, - ) - with self._lock: if self._unfinished_ids: - defer.returnValue(self._unfinished_ids[0] - 1) + return self._unfinished_ids[0] - 1 - defer.returnValue(self._current_max) + return self._current_max def _get_or_compute_current_max(self, txn): with self._lock: -- cgit 1.4.1 From b97f6626b6f9b91498d06a7ae113b9d20f1fc2ef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Jan 2016 09:54:30 +0000 Subject: Add cache to room stream --- synapse/handlers/sync.py | 42 +++++++--- synapse/storage/events.py | 2 + synapse/storage/receipts.py | 65 +-------------- synapse/storage/stream.py | 133 +++++++++++++++++++++++++++++++ synapse/util/caches/room_change_cache.py | 86 ++++++++++++++++++++ 5 files changed, 254 insertions(+), 74 deletions(-) create mode 100644 synapse/util/caches/room_change_cache.py (limited to 'synapse/storage/receipts.py') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 328c049b03..1fdf978313 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -514,13 +514,6 @@ class SyncHandler(BaseHandler): timeline_limit = sync_config.filter_collection.timeline_limit() - room_events, _ = yield self.store.get_room_events_stream( - sync_config.user.to_string(), - from_key=since_token.room_key, - to_key=now_token.room_key, - limit=timeline_limit + 1, - ) - tags_by_room = yield self.store.get_updated_tags( sync_config.user.to_string(), since_token.account_data_key, @@ -533,6 +526,32 @@ class SyncHandler(BaseHandler): ) ) + rooms_changed = yield self.store.get_room_changes_for_user( + sync_config.user.to_string(), since_token.room_key, now_token.room_key + ) + + room_to_events = yield self.store.get_room_events_stream_for_rooms( + room_ids=room_ids, + from_key=since_token.room_key, + to_key=now_token.room_key, + limit=timeline_limit + 1, + ) + + room_events = [ + event + for events, _ in room_to_events.values() + for event in events + ] + + room_events.extend(rooms_changed) + + # room_events, _ = yield self.store.get_room_events_stream( + # sync_config.user.to_string(), + # from_key=since_token.room_key, + # to_key=now_token.room_key, + # limit=timeline_limit + 1, + # ) + joined = [] archived = [] if len(room_events) <= timeline_limit: @@ -694,14 +713,12 @@ class SyncHandler(BaseHandler): end_key = room_key while limited and len(recents) < timeline_limit and max_repeat: - events, keys = yield self.store.get_recent_events_for_room( + events, end_key = yield self.store.get_recent_room_events_stream_for_room( room_id, limit=load_limit + 1, - from_token=since_token.room_key if since_token else None, - end_token=end_key, + from_key=since_token.room_key if since_token else None, + to_key=end_key, ) - room_key, _ = keys - end_key = "s" + room_key.split('-')[-1] loaded_recents = sync_config.filter_collection.filter_room_timeline(events) loaded_recents = yield self._filter_events_for_client( sync_config.user.to_string(), @@ -712,6 +729,7 @@ class SyncHandler(BaseHandler): recents = loaded_recents if len(events) <= load_limit: limited = False + break max_repeat -= 1 if len(recents) > timeline_limit: diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 298cb9bada..d96ea3a30e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -128,6 +128,8 @@ class EventsStore(SQLBaseStore): is_new_state=is_new_state, current_state=current_state, ) + logger.info("Invalidating %r at %r", event.room_id, stream_ordering) + self._events_stream_cache.room_has_changed(None, event.room_id, stream_ordering) except _RollbackButIsFineException: pass diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index c0593e23ee..b7a4e77748 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -16,6 +16,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList, cached from synapse.util.caches import cache_counter, caches_by_name +from synapse.util.caches.room_change_cache import RoomStreamChangeCache from twisted.internet import defer @@ -31,8 +32,8 @@ class ReceiptsStore(SQLBaseStore): def __init__(self, hs): super(ReceiptsStore, self).__init__(hs) - self._receipts_stream_cache = _RoomStreamChangeCache( - self._receipts_id_gen.get_max_token(None) + self._receipts_stream_cache = RoomStreamChangeCache( + "ReceiptsRoomChangeCache", self._receipts_id_gen.get_max_token(None) ) @cached(num_args=2) @@ -370,63 +371,3 @@ class ReceiptsStore(SQLBaseStore): "data": json.dumps(data), } ) - - -class _RoomStreamChangeCache(object): - """Keeps track of the stream_id of the latest change in rooms. - - Given a list of rooms and stream key, it will give a subset of rooms that - may have changed since that key. If the key is too old then the cache - will simply return all rooms. - """ - def __init__(self, current_key, size_of_cache=10000): - self._size_of_cache = size_of_cache - self._room_to_key = {} - self._cache = sorteddict() - self._earliest_key = current_key - self.name = "ReceiptsRoomChangeCache" - caches_by_name[self.name] = self._cache - - @defer.inlineCallbacks - def get_rooms_changed(self, store, room_ids, key): - """Returns subset of room ids that have had new receipts since the - given key. If the key is too old it will just return the given list. - """ - if key > (yield self._get_earliest_key(store)): - keys = self._cache.keys() - i = keys.bisect_right(key) - - result = set( - self._cache[k] for k in keys[i:] - ).intersection(room_ids) - - cache_counter.inc_hits(self.name) - else: - result = room_ids - cache_counter.inc_misses(self.name) - - defer.returnValue(result) - - @defer.inlineCallbacks - def room_has_changed(self, store, room_id, key): - """Informs the cache that the room has been changed at the given key. - """ - if key > (yield self._get_earliest_key(store)): - old_key = self._room_to_key.get(room_id, None) - if old_key: - key = max(key, old_key) - self._cache.pop(old_key, None) - self._cache[key] = room_id - - while len(self._cache) > self._size_of_cache: - k, r = self._cache.popitem() - self._earliest_key = max(k, self._earliest_key) - self._room_to_key.pop(r, None) - - @defer.inlineCallbacks - def _get_earliest_key(self, store): - if self._earliest_key is None: - self._earliest_key = yield store.get_max_receipt_stream_id() - self._earliest_key = int(self._earliest_key) - - defer.returnValue(self._earliest_key) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index e31bad258a..3a32a0019a 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -37,6 +37,7 @@ from twisted.internet import defer from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks +from synapse.util.caches.room_change_cache import RoomStreamChangeCache from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken from synapse.util.logutils import log_function @@ -77,6 +78,12 @@ def upper_bound(token): class StreamStore(SQLBaseStore): + def __init__(self, hs): + super(StreamStore, self).__init__(hs) + + self._events_stream_cache = RoomStreamChangeCache( + "EventsRoomStreamChangeCache", self._stream_id_gen.get_max_token(None) + ) @defer.inlineCallbacks def get_appservice_room_stream(self, service, from_key, to_key, limit=0): @@ -157,6 +164,132 @@ class StreamStore(SQLBaseStore): results = yield self.runInteraction("get_appservice_room_stream", f) defer.returnValue(results) + @defer.inlineCallbacks + def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0): + from_id = RoomStreamToken.parse_stream_token(from_key).stream + + room_ids = yield self._events_stream_cache.get_rooms_changed( + self, room_ids, from_id + ) + + if not room_ids: + defer.returnValue({}) + + results = {} + room_ids = list(room_ids) + for rm_ids in (room_ids[i:i+20] for i in xrange(0, len(room_ids), 20)): + res = yield defer.gatherResults([ + self.get_recent_room_events_stream_for_room( + room_id, from_key, to_key, limit + ).addCallback(lambda r, rm: (rm, r), room_id) + for room_id in room_ids + ]) + results.update(dict(res)) + + defer.returnValue(results) + + @defer.inlineCallbacks + def get_recent_room_events_stream_for_room(self, room_id, from_key, to_key, limit=0): + if from_key is not None: + from_id = RoomStreamToken.parse_stream_token(from_key).stream + else: + from_id = None + to_id = RoomStreamToken.parse_stream_token(to_key).stream + + if from_key == to_key: + defer.returnValue(([], from_key)) + + has_changed = yield self._events_stream_cache.get_room_has_changed( + room_id, from_id + ) + + if not has_changed: + defer.returnValue(([], from_key)) + + def f(txn): + if from_id is not None: + sql = ( + "SELECT event_id, stream_ordering FROM events WHERE" + " room_id = ?" + " AND not outlier" + " AND stream_ordering > ? AND stream_ordering <= ?" + " ORDER BY stream_ordering DESC LIMIT ?" + ) + txn.execute(sql, (room_id, from_id, to_id, limit)) + else: + sql = ( + "SELECT event_id, stream_ordering FROM events WHERE" + " room_id = ?" + " AND not outlier" + " AND stream_ordering <= ?" + " ORDER BY stream_ordering DESC LIMIT ?" + ) + txn.execute(sql, (room_id, to_id, limit)) + + rows = self.cursor_to_dict(txn) + + ret = self._get_events_txn( + txn, + [r["event_id"] for r in rows], + get_prev_content=True + ) + + ret.reverse() + + self._set_before_and_after(ret, rows) + + if rows: + key = "s%d" % min(r["stream_ordering"] for r in rows) + else: + # Assume we didn't get anything because there was nothing to + # get. + key = from_key + + return ret, key + res = yield self.runInteraction("get_recent_room_events_stream_for_room", f) + defer.returnValue(res) + + def get_room_changes_for_user(self, user_id, from_key, to_key): + if from_key is not None: + from_id = RoomStreamToken.parse_stream_token(from_key).stream + else: + from_id = None + to_id = RoomStreamToken.parse_stream_token(to_key).stream + + if from_key == to_key: + return defer.succeed([]) + + def f(txn): + if from_id is not None: + sql = ( + "SELECT m.event_id, stream_ordering FROM events AS e, room_memberships AS m" + " WHERE e.event_id = m.event_id" + " AND m.user_id = ?" + " AND e.stream_ordering > ? AND e.stream_ordering <= ?" + " ORDER BY e.stream_ordering ASC" + ) + txn.execute(sql, (user_id, from_id, to_id,)) + else: + sql = ( + "SELECT m.event_id, stream_ordering FROM events AS e, room_memberships AS m" + " WHERE e.event_id = m.event_id" + " AND m.user_id = ?" + " AND stream_ordering <= ?" + " ORDER BY stream_ordering ASC" + ) + txn.execute(sql, (user_id, to_id,)) + rows = self.cursor_to_dict(txn) + + ret = self._get_events_txn( + txn, + [r["event_id"] for r in rows], + get_prev_content=True + ) + + return ret + + return self.runInteraction("get_room_changes_for_user", f) + @log_function def get_room_events_stream( self, diff --git a/synapse/util/caches/room_change_cache.py b/synapse/util/caches/room_change_cache.py new file mode 100644 index 0000000000..3a873c9c30 --- /dev/null +++ b/synapse/util/caches/room_change_cache.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.util.caches import cache_counter, caches_by_name + + +from blist import sorteddict +import logging + + +logger = logging.getLogger(__name__) + + +class RoomStreamChangeCache(object): + """Keeps track of the stream_id of the latest change in rooms. + + Given a list of rooms and stream key, it will give a subset of rooms that + may have changed since that key. If the key is too old then the cache + will simply return all rooms. + """ + def __init__(self, name, current_key, size_of_cache=10000): + self._size_of_cache = size_of_cache + self._room_to_key = {} + self._cache = sorteddict() + self._earliest_known_key = current_key + self.name = name + caches_by_name[self.name] = self._cache + + def get_room_has_changed(self, room_id, key): + if key <= self._earliest_known_key: + return True + + room_key = self._room_to_key.get(room_id, None) + if room_key is None: + return True + + if key < room_key: + return True + + return False + + def get_rooms_changed(self, store, room_ids, key): + """Returns subset of room ids that have had new things since the + given key. If the key is too old it will just return the given list. + """ + if key > self._earliest_known_key: + keys = self._cache.keys() + i = keys.bisect_right(key) + + result = set( + self._cache[k] for k in keys[i:] + ).intersection(room_ids) + + cache_counter.inc_hits(self.name) + else: + result = room_ids + cache_counter.inc_misses(self.name) + + return result + + def room_has_changed(self, store, room_id, key): + """Informs the cache that the room has been changed at the given key. + """ + if key > self._earliest_known_key: + old_key = self._room_to_key.get(room_id, None) + if old_key: + key = max(key, old_key) + self._cache.pop(old_key, None) + self._cache[key] = room_id + + while len(self._cache) > self._size_of_cache: + k, r = self._cache.popitem() + self._earliest_key = max(k, self._earliest_key) + self._room_to_key.pop(r, None) -- cgit 1.4.1 From e7febf4fbb1f1beb11e7a03252f6844f84af7f30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Jan 2016 17:11:04 +0000 Subject: PEP8 --- synapse/storage/events.py | 4 +++- synapse/storage/receipts.py | 2 -- synapse/storage/stream.py | 9 ++++++--- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'synapse/storage/receipts.py') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 0dd1daaa2e..80187722ea 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -128,7 +128,9 @@ class EventsStore(SQLBaseStore): is_new_state=is_new_state, current_state=current_state, ) - self._events_stream_cache.room_has_changed(None, event.room_id, stream_ordering) + self._events_stream_cache.room_has_changed( + None, event.room_id, stream_ordering + ) except _RollbackButIsFineException: pass diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index b7a4e77748..7118368d97 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -15,12 +15,10 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList, cached -from synapse.util.caches import cache_counter, caches_by_name from synapse.util.caches.room_change_cache import RoomStreamChangeCache from twisted.internet import defer -from blist import sorteddict import logging import ujson as json diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 563e289c4e..0b22251790 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -262,7 +262,8 @@ class StreamStore(SQLBaseStore): def f(txn): if from_id is not None: sql = ( - "SELECT m.event_id, stream_ordering FROM events AS e, room_memberships AS m" + "SELECT m.event_id, stream_ordering FROM events AS e," + " room_memberships AS m" " WHERE e.event_id = m.event_id" " AND m.user_id = ?" " AND e.stream_ordering > ? AND e.stream_ordering <= ?" @@ -271,7 +272,8 @@ class StreamStore(SQLBaseStore): txn.execute(sql, (user_id, from_id, to_id,)) else: sql = ( - "SELECT m.event_id, stream_ordering FROM events AS e, room_memberships AS m" + "SELECT m.event_id, stream_ordering FROM events AS e," + " room_memberships AS m" " WHERE e.event_id = m.event_id" " AND m.user_id = ?" " AND stream_ordering <= ?" @@ -307,7 +309,8 @@ class StreamStore(SQLBaseStore): "SELECT c.room_id FROM history_visibility AS h" " INNER JOIN current_state_events AS c" " ON h.event_id = c.event_id" - " WHERE c.room_id IN (%s) AND h.history_visibility = 'world_readable'" % ( + " WHERE c.room_id IN (%s)" + " AND h.history_visibility = 'world_readable'" % ( ",".join(map(lambda _: "?", room_ids)) ) ) -- cgit 1.4.1 From e1941442d442fe62570551071edfd936304697e7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Jan 2016 15:02:37 +0000 Subject: Invalidate caches properly. Remove unused arg --- synapse/storage/events.py | 9 ++++++--- synapse/storage/receipts.py | 10 ++++++---- synapse/storage/stream.py | 2 +- synapse/util/caches/room_change_cache.py | 4 ++-- 4 files changed, 15 insertions(+), 10 deletions(-) (limited to 'synapse/storage/receipts.py') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 80187722ea..2d2270b297 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -128,9 +128,6 @@ class EventsStore(SQLBaseStore): is_new_state=is_new_state, current_state=current_state, ) - self._events_stream_cache.room_has_changed( - None, event.room_id, stream_ordering - ) except _RollbackButIsFineException: pass @@ -213,6 +210,12 @@ class EventsStore(SQLBaseStore): for event, _ in events_and_contexts: txn.call_after(self._invalidate_get_event_cache, event.event_id) + if not backfilled: + txn.call_after( + self._events_stream_cache.room_has_changed, + event.room_id, event.internal_metadata.stream_ordering, + ) + depth_updates = {} for event, _ in events_and_contexts: if event.internal_metadata.is_outlier(): diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 7118368d97..5ffbfdec51 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -78,7 +78,7 @@ class ReceiptsStore(SQLBaseStore): if from_key: room_ids = yield self._receipts_stream_cache.get_rooms_changed( - self, room_ids, from_key + room_ids, from_key ) results = yield self._get_linearized_receipts_for_rooms( @@ -221,6 +221,11 @@ class ReceiptsStore(SQLBaseStore): # FIXME: This shouldn't invalidate the whole cache txn.call_after(self.get_linearized_receipts_for_room.invalidate_all) + txn.call_after( + self._receipts_stream_cache.room_has_changed, + room_id, stream_id + ) + # We don't want to clobber receipts for more recent events, so we # have to compare orderings of existing receipts sql = ( @@ -308,9 +313,6 @@ class ReceiptsStore(SQLBaseStore): stream_id_manager = yield self._receipts_id_gen.get_next(self) with stream_id_manager as stream_id: - yield self._receipts_stream_cache.room_has_changed( - self, room_id, stream_id - ) have_persisted = yield self.runInteraction( "insert_linearized_receipt", self.insert_linearized_receipt_txn, diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 5096b46864..67e7e6a76f 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -169,7 +169,7 @@ class StreamStore(SQLBaseStore): from_id = RoomStreamToken.parse_stream_token(from_key).stream room_ids = yield self._events_stream_cache.get_rooms_changed( - self, room_ids, from_id + room_ids, from_id ) if not room_ids: diff --git a/synapse/util/caches/room_change_cache.py b/synapse/util/caches/room_change_cache.py index 3a873c9c30..eb2ab5f1e4 100644 --- a/synapse/util/caches/room_change_cache.py +++ b/synapse/util/caches/room_change_cache.py @@ -51,7 +51,7 @@ class RoomStreamChangeCache(object): return False - def get_rooms_changed(self, store, room_ids, key): + def get_rooms_changed(self, room_ids, key): """Returns subset of room ids that have had new things since the given key. If the key is too old it will just return the given list. """ @@ -70,7 +70,7 @@ class RoomStreamChangeCache(object): return result - def room_has_changed(self, store, room_id, key): + def room_has_changed(self, room_id, key): """Informs the cache that the room has been changed at the given key. """ if key > self._earliest_known_key: -- cgit 1.4.1 From 00cb3eb24b277bb37bd1b7d8449c08a37cb4b014 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Jan 2016 16:37:41 +0000 Subject: Cache tags and account data --- synapse/storage/account_data.py | 20 ++++++- synapse/storage/events.py | 2 +- synapse/storage/receipts.py | 8 +-- synapse/storage/stream.py | 8 +-- synapse/storage/tags.py | 14 +++++ synapse/util/caches/room_change_cache.py | 92 ----------------------------- synapse/util/caches/stream_change_cache.py | 95 ++++++++++++++++++++++++++++++ 7 files changed, 137 insertions(+), 102 deletions(-) delete mode 100644 synapse/util/caches/room_change_cache.py create mode 100644 synapse/util/caches/stream_change_cache.py (limited to 'synapse/storage/receipts.py') diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 9c6597e012..95294c3f6c 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -14,6 +14,7 @@ # limitations under the License. from ._base import SQLBaseStore +from synapse.util.caches.stream_change_cache import StreamChangeCache from twisted.internet import defer import ujson as json @@ -23,6 +24,13 @@ logger = logging.getLogger(__name__) class AccountDataStore(SQLBaseStore): + def __init__(self, hs): + super(AccountDataStore, self).__init__(hs) + + self._account_data_stream_cache = StreamChangeCache( + "AccountDataChangeCache", self._account_data_id_gen.get_max_token(None), + max_size=1000, + ) def get_account_data_for_user(self, user_id): """Get all the client account_data for a user. @@ -83,7 +91,7 @@ class AccountDataStore(SQLBaseStore): "get_account_data_for_room", get_account_data_for_room_txn ) - def get_updated_account_data_for_user(self, user_id, stream_id): + def get_updated_account_data_for_user(self, user_id, stream_id, room_ids=None): """Get all the client account_data for a that's changed. Args: @@ -120,6 +128,12 @@ class AccountDataStore(SQLBaseStore): return (global_account_data, account_data_by_room) + changed = self._account_data_stream_cache.get_entity_has_changed( + user_id, int(stream_id) + ) + if not changed: + defer.returnValue(({}, {})) + return self.runInteraction( "get_updated_account_data_for_user", get_updated_account_data_for_user_txn ) @@ -186,6 +200,10 @@ class AccountDataStore(SQLBaseStore): "content": content_json, } ) + txn.call_after( + self._account_data_stream_cache.entity_has_changed, + user_id, next_id, + ) self._update_max_stream_id(txn, next_id) with (yield self._account_data_id_gen.get_next(self)) as next_id: diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2d2270b297..5e85552029 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -212,7 +212,7 @@ class EventsStore(SQLBaseStore): if not backfilled: txn.call_after( - self._events_stream_cache.room_has_changed, + self._events_stream_cache.entity_has_changed, event.room_id, event.internal_metadata.stream_ordering, ) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 5ffbfdec51..8068c73740 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -15,7 +15,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList, cached -from synapse.util.caches.room_change_cache import RoomStreamChangeCache +from synapse.util.caches.stream_change_cache import StreamChangeCache from twisted.internet import defer @@ -30,7 +30,7 @@ class ReceiptsStore(SQLBaseStore): def __init__(self, hs): super(ReceiptsStore, self).__init__(hs) - self._receipts_stream_cache = RoomStreamChangeCache( + self._receipts_stream_cache = StreamChangeCache( "ReceiptsRoomChangeCache", self._receipts_id_gen.get_max_token(None) ) @@ -77,7 +77,7 @@ class ReceiptsStore(SQLBaseStore): room_ids = set(room_ids) if from_key: - room_ids = yield self._receipts_stream_cache.get_rooms_changed( + room_ids = yield self._receipts_stream_cache.get_entities_changed( room_ids, from_key ) @@ -222,7 +222,7 @@ class ReceiptsStore(SQLBaseStore): txn.call_after(self.get_linearized_receipts_for_room.invalidate_all) txn.call_after( - self._receipts_stream_cache.room_has_changed, + self._receipts_stream_cache.entity_has_changed, room_id, stream_id ) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 6a724193e1..c7d7893328 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -37,7 +37,7 @@ from twisted.internet import defer from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks -from synapse.util.caches.room_change_cache import RoomStreamChangeCache +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken from synapse.util.logutils import log_function @@ -81,7 +81,7 @@ class StreamStore(SQLBaseStore): def __init__(self, hs): super(StreamStore, self).__init__(hs) - self._events_stream_cache = RoomStreamChangeCache( + self._events_stream_cache = StreamChangeCache( "EventsRoomStreamChangeCache", self._stream_id_gen.get_max_token(None) ) @@ -168,7 +168,7 @@ class StreamStore(SQLBaseStore): def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0): from_id = RoomStreamToken.parse_stream_token(from_key).stream - room_ids = yield self._events_stream_cache.get_rooms_changed( + room_ids = yield self._events_stream_cache.get_entities_changed( room_ids, from_id ) @@ -200,7 +200,7 @@ class StreamStore(SQLBaseStore): defer.returnValue(([], from_key)) if from_id: - has_changed = yield self._events_stream_cache.get_room_has_changed( + has_changed = yield self._events_stream_cache.get_entity_has_changed( room_id, from_id ) diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 4c39e07cbd..50af899192 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -15,6 +15,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached +from synapse.util.caches.stream_change_cache import StreamChangeCache from twisted.internet import defer import ujson as json @@ -24,6 +25,13 @@ logger = logging.getLogger(__name__) class TagsStore(SQLBaseStore): + def __init__(self, hs): + super(TagsStore, self).__init__(hs) + + self._tags_stream_cache = StreamChangeCache( + "TagsChangeCache", self._account_data_id_gen.get_max_token(None), + max_size=1000, + ) def get_max_account_data_stream_id(self): """Get the current max stream id for the private user data stream @@ -80,6 +88,10 @@ class TagsStore(SQLBaseStore): room_ids = [row[0] for row in txn.fetchall()] return room_ids + changed = self._tags_stream_cache.get_entity_has_changed(user_id, int(stream_id)) + if not changed: + defer.returnValue({}) + room_ids = yield self.runInteraction( "get_updated_tags", get_updated_tags_txn ) @@ -177,6 +189,8 @@ class TagsStore(SQLBaseStore): next_id(int): The the revision to advance to. """ + txn.call_after(self._tags_stream_cache.entity_has_changed, user_id, next_id) + update_max_id_sql = ( "UPDATE account_data_max_stream_id" " SET stream_id = ?" diff --git a/synapse/util/caches/room_change_cache.py b/synapse/util/caches/room_change_cache.py deleted file mode 100644 index e8bfedd72f..0000000000 --- a/synapse/util/caches/room_change_cache.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.util.caches import cache_counter, caches_by_name - - -from blist import sorteddict -import logging - - -logger = logging.getLogger(__name__) - - -class RoomStreamChangeCache(object): - """Keeps track of the stream_id of the latest change in rooms. - - Given a list of rooms and stream key, it will give a subset of rooms that - may have changed since that key. If the key is too old then the cache - will simply return all rooms. - """ - def __init__(self, name, current_key, size_of_cache=10000): - self._size_of_cache = size_of_cache - self._room_to_key = {} - self._cache = sorteddict() - self._earliest_known_key = current_key - self.name = name - caches_by_name[self.name] = self._cache - - def get_room_has_changed(self, room_id, key): - assert type(key) is int - - if key <= self._earliest_known_key: - return True - - room_key = self._room_to_key.get(room_id, None) - if room_key is None: - return True - - if key < room_key: - return True - - return False - - def get_rooms_changed(self, room_ids, key): - """Returns subset of room ids that have had new things since the - given key. If the key is too old it will just return the given list. - """ - assert type(key) is int - - if key > self._earliest_known_key: - keys = self._cache.keys() - i = keys.bisect_right(key) - - result = set( - self._cache[k] for k in keys[i:] - ).intersection(room_ids) - - cache_counter.inc_hits(self.name) - else: - result = room_ids - cache_counter.inc_misses(self.name) - - return result - - def room_has_changed(self, room_id, key): - """Informs the cache that the room has been changed at the given key. - """ - assert type(key) is int - - if key > self._earliest_known_key: - old_key = self._room_to_key.get(room_id, None) - if old_key: - key = max(key, old_key) - self._cache.pop(old_key, None) - self._cache[key] = room_id - - while len(self._cache) > self._size_of_cache: - k, r = self._cache.popitem() - self._earliest_key = max(k, self._earliest_key) - self._room_to_key.pop(r, None) diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py new file mode 100644 index 0000000000..33b37f7f29 --- /dev/null +++ b/synapse/util/caches/stream_change_cache.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.util.caches import cache_counter, caches_by_name + + +from blist import sorteddict +import logging + + +logger = logging.getLogger(__name__) + + +class StreamChangeCache(object): + """Keeps track of the stream positions of the latest change in a set of entities. + + Typically the entity will be a room or user id. + + Given a list of entities and a stream position, it will give a subset of + entities that may have changed since that position. If position key is too + old then the cache will simply return all given entities. + """ + def __init__(self, name, current_stream_pos, max_size=10000): + self._max_size = max_size + self._entity_to_key = {} + self._cache = sorteddict() + self._earliest_known_stream_pos = current_stream_pos + self.name = name + caches_by_name[self.name] = self._cache + + def get_entity_has_changed(self, entity, stream_pos): + assert type(stream_pos) is int + + if stream_pos <= self._earliest_known_stream_pos: + return True + + latest_entity_change_pos = self._entity_to_key.get(entity, None) + if latest_entity_change_pos is None: + return True + + if stream_pos < latest_entity_change_pos: + return True + + return False + + def get_entities_changed(self, entities, stream_pos): + """Returns subset of entities that have had new things since the + given position. If the position is too old it will just return the given list. + """ + assert type(stream_pos) is int + + if stream_pos > self._earliest_known_stream_pos: + keys = self._cache.keys() + i = keys.bisect_right(stream_pos) + + result = set( + self._cache[k] for k in keys[i:] + ).intersection(entities) + + cache_counter.inc_hits(self.name) + else: + result = entities + cache_counter.inc_misses(self.name) + + return result + + def entity_has_changed(self, entitiy, stream_pos): + """Informs the cache that the entitiy has been changed at the given + position. + """ + assert type(stream_pos) is int + + if stream_pos > self._earliest_known_stream_pos: + old_pos = self._entity_to_key.get(entitiy, None) + if old_pos: + stream_pos = max(stream_pos, old_pos) + self._cache.pop(old_pos, None) + self._cache[stream_pos] = entitiy + + while len(self._cache) > self._max_size: + k, r = self._cache.popitem() + self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos) + self._entity_to_key.pop(r, None) -- cgit 1.4.1 From eff12e838ce10588ca8103c9131dcfe2f2e7950e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Feb 2016 13:55:59 +0000 Subject: Don't load all ephemeral state for a room on every sync --- synapse/handlers/sync.py | 20 ++++++-------------- synapse/storage/receipts.py | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'synapse/storage/receipts.py') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 446f8bbe93..6a5868f87e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -319,7 +319,6 @@ class SyncHandler(BaseHandler): ephemeral_by_room=ephemeral_by_room, tags_by_room=tags_by_room, account_data_by_room=account_data_by_room, - all_ephemeral_by_room=ephemeral_by_room, batch=batch, full_state=True, ) @@ -453,13 +452,6 @@ class SyncHandler(BaseHandler): ) now_token = now_token.copy_and_replace("presence_key", presence_key) - # We now fetch all ephemeral events for this room in order to get - # this users current read receipt. This could almost certainly be - # optimised. - _, all_ephemeral_by_room = yield self.ephemeral_by_room( - sync_config, now_token - ) - now_token, ephemeral_by_room = yield self.ephemeral_by_room( sync_config, now_token, since_token ) @@ -591,7 +583,6 @@ class SyncHandler(BaseHandler): ephemeral_by_room=ephemeral_by_room, tags_by_room=tags_by_room, account_data_by_room=account_data_by_room, - all_ephemeral_by_room=all_ephemeral_by_room, batch=batch, full_state=full_state, ) @@ -691,7 +682,6 @@ class SyncHandler(BaseHandler): since_token, now_token, ephemeral_by_room, tags_by_room, account_data_by_room, - all_ephemeral_by_room, batch, full_state=False): state = yield self.compute_state_delta( room_id, batch, sync_config, since_token, now_token, @@ -722,7 +712,7 @@ class SyncHandler(BaseHandler): if room_sync: notifs = yield self.unread_notifs_for_room_id( - room_id, sync_config, all_ephemeral_by_room + room_id, sync_config ) if notifs is not None: @@ -906,10 +896,12 @@ class SyncHandler(BaseHandler): return False @defer.inlineCallbacks - def unread_notifs_for_room_id(self, room_id, sync_config, ephemeral_by_room): + def unread_notifs_for_room_id(self, room_id, sync_config): with Measure(self.clock, "unread_notifs_for_room_id"): - last_unread_event_id = self.last_read_event_id_for_room_and_user( - room_id, sync_config.user.to_string(), ephemeral_by_room + last_unread_event_id = yield self.store.get_last_receipt_event_id_for_user( + user_id=sync_config.user.to_string(), + room_id=room_id, + receipt_type="m.read" ) notifs = [] diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 8068c73740..1aff9f070e 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -46,6 +46,20 @@ class ReceiptsStore(SQLBaseStore): desc="get_receipts_for_room", ) + @cached(num_args=3) + def get_last_receipt_event_id_for_user(self, user_id, room_id, receipt_type): + return self._simple_select_one_onecol( + table="receipts_linearized", + keyvalues={ + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id + }, + retcol="event_id", + desc="get_own_receipt_for_user", + allow_none=True, + ) + @cachedInlineCallbacks(num_args=2) def get_receipts_for_user(self, user_id, receipt_type): def f(txn): -- cgit 1.4.1 From 70a8608749e0c1ec7a993a9effc424303af24738 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Feb 2016 14:27:29 +0000 Subject: Invalidate get_last_receipt_event_id_for_user cache --- synapse/storage/receipts.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse/storage/receipts.py') diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 1aff9f070e..4202a6b3dc 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -240,6 +240,11 @@ class ReceiptsStore(SQLBaseStore): room_id, stream_id ) + txn.call_after( + self.get_last_receipt_event_id_for_user.invalidate, + (user_id, room_id, receipt_type) + ) + # We don't want to clobber receipts for more recent events, so we # have to compare orderings of existing receipts sql = ( -- cgit 1.4.1