From b30cd5b107acafce43fb63c471c086b8df4d981a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 11:31:00 +0000 Subject: Remove dead code related to default thumbnails --- synapse/storage/media_repository.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index a66ff7c1e0..6ebc372498 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -29,9 +29,6 @@ class MediaRepositoryStore(BackgroundUpdateStore): where_clause='url_cache IS NOT NULL', ) - def get_default_thumbnails(self, top_level_type, sub_type): - return [] - def get_local_media(self, media_id): """Get the metadata for a local piece of media Returns: -- cgit 1.5.1 From 19f9227643b5099666878de33453bbe361f216fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:25:04 +0000 Subject: avoid 80s GIN inserts by tweaking work_mem see https://github.com/matrix-org/synapse/issues/2753 for details --- synapse/storage/search.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c636..7b1166f417 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -106,6 +106,7 @@ class SearchStore(BackgroundUpdateStore): event_search_rows.append((event_id, room_id, key, value)) if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256KB'") sql = ( "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" @@ -123,6 +124,9 @@ class SearchStore(BackgroundUpdateStore): clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] txn.executemany(sql, clump) + if isinstance(self.database_engine, PostgresEngine): + txn.execute("RESET work_mem") + progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, -- cgit 1.5.1 From e365ad329f3c7e12bb2126217acbc62bdf0b9aec Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:30:30 +0000 Subject: oops, tweak work_mem when actually storing --- synapse/storage/room.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b7..9e2bf1ab48 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -310,6 +310,7 @@ class RoomStore(SQLBaseStore): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256KB'") sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" @@ -323,6 +324,7 @@ class RoomStore(SQLBaseStore): event.origin_server_ts, ) ) + txn.execute("RESET work_mem") elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" -- cgit 1.5.1 From 174eacc8ba71015003a78594ebc89cbe45d8384a Mon Sep 17 00:00:00 2001 From: hera Date: Tue, 9 Jan 2018 18:06:30 +0000 Subject: oops --- synapse/storage/room.py | 2 +- synapse/storage/search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9e2bf1ab48..0604f8f270 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -310,7 +310,7 @@ class RoomStore(SQLBaseStore): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256KB'") + txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 7b1166f417..f52f3c8592 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -106,7 +106,7 @@ class SearchStore(BackgroundUpdateStore): event_search_rows.append((event_id, room_id, key, value)) if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256KB'") + txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" -- cgit 1.5.1 From 64ddec1bc0a1d23a285d560e34986441b3f8c854 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 11:47:36 +0000 Subject: Fix a logcontext leak in persist_events ObserveableDeferred expects its callbacks to be called without any logcontexts, whereas it turns out we were calling them with the logcontext of the request which initiated the persistence loop. It seems wrong that we are attributing work done in the persistence loop to the request that happened to initiate it, so let's solve this by dropping the logcontext for it. (I'm not sure this actually causes any real problems other than messages in the debug log, but let's clean it up anyway) --- synapse/storage/events.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d08f7571d7..ad1d782705 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -146,6 +146,9 @@ class _EventPeristenceQueue(object): try: queue = self._get_drainining_queue(room_id) for item in queue: + # handle_queue_loop runs in the sentinel logcontext, so + # there is no need to preserve_fn when running the + # callbacks on the deferred. try: ret = yield per_item_callback(item) item.deferred.callback(ret) @@ -157,7 +160,11 @@ class _EventPeristenceQueue(object): self._event_persist_queues[room_id] = queue self._currently_persisting_rooms.discard(room_id) - preserve_fn(handle_queue_loop)() + # set handle_queue_loop off on the background. We don't want to + # attribute work done in it to the current request, so we drop the + # logcontext altogether. + with PreserveLoggingContext(): + handle_queue_loop() def _get_drainining_queue(self, room_id): queue = self._event_persist_queues.setdefault(room_id, deque()) -- cgit 1.5.1 From 8615f19d20f4a2048773b60ce840aab48f3e11b0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 16:17:24 +0000 Subject: rework runInteraction in terms of runConnection ... so that we can share the code --- synapse/storage/_base.py | 51 +++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 20 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index b971f0cb18..986617674c 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -291,33 +291,33 @@ class SQLBaseStore(object): @defer.inlineCallbacks def runInteraction(self, desc, func, *args, **kwargs): - """Wraps the .runInteraction() method on the underlying db_pool.""" - current_context = LoggingContext.current_context() + """Starts a transaction on the database and runs a given function - start_time = time.time() * 1000 + Arguments: + desc (str): description of the transaction, for logging and metrics + func (func): callback function, which will be called with a + database transaction (twisted.enterprise.adbapi.Transaction) as + its first argument, followed by `args` and `kwargs`. + + args (list): positional args to pass to `func` + kwargs (dict): named args to pass to `func` + + Returns: + Deferred: The result of func + """ + current_context = LoggingContext.current_context() after_callbacks = [] final_callbacks = [] def inner_func(conn, *args, **kwargs): - with LoggingContext("runInteraction") as context: - sql_scheduling_timer.inc_by(time.time() * 1000 - start_time) - - if self.database_engine.is_connection_closed(conn): - logger.debug("Reconnecting closed database connection") - conn.reconnect() - - current_context.copy_to(context) - return self._new_transaction( - conn, desc, after_callbacks, final_callbacks, current_context, - func, *args, **kwargs - ) + return self._new_transaction( + conn, desc, after_callbacks, final_callbacks, current_context, + func, *args, **kwargs + ) try: - with PreserveLoggingContext(): - result = yield self._db_pool.runWithConnection( - inner_func, *args, **kwargs - ) + result = yield self.runWithConnection(inner_func, *args, **kwargs) for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) @@ -329,7 +329,18 @@ class SQLBaseStore(object): @defer.inlineCallbacks def runWithConnection(self, func, *args, **kwargs): - """Wraps the .runInteraction() method on the underlying db_pool.""" + """Wraps the .runWithConnection() method on the underlying db_pool. + + Arguments: + func (func): callback function, which will be called with a + database connection (twisted.enterprise.adbapi.Connection) as + its first argument, followed by `args` and `kwargs`. + args (list): positional args to pass to `func` + kwargs (dict): named args to pass to `func` + + Returns: + Deferred: The result of func + """ current_context = LoggingContext.current_context() start_time = time.time() * 1000 -- cgit 1.5.1 From 3d12d97415ac6d6a4ab8188af31c7df12c5d19f8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Jan 2018 00:27:14 +0000 Subject: Track DB scheduling delay per-request For each request, track the amount of time spent waiting for a db connection. This entails adding it to the LoggingContext and we may as well add metrics for it while we are passing. --- synapse/http/server.py | 7 +++++++ synapse/http/site.py | 4 +++- synapse/storage/_base.py | 4 +++- synapse/util/logcontext.py | 18 +++++++++++++++++- synapse/util/metrics.py | 14 +++++++++++++- 5 files changed, 43 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/http/server.py b/synapse/http/server.py index 0f30e6fd56..7b6418bc2c 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -102,6 +102,10 @@ response_db_txn_duration = metrics.register_counter( ), ) +# seconds spent waiting for a db connection, when processing this request +response_db_sched_duration = metrics.register_counter( + "response_db_sched_duration_seconds", labels=["method", "servlet", "tag"] +) _next_request_id = 0 @@ -381,6 +385,9 @@ class RequestMetrics(object): response_db_txn_duration.inc_by( context.db_txn_duration_ms / 1000., request.method, self.name, tag ) + response_db_sched_duration.inc_by( + context.db_sched_duration_ms / 1000., request.method, self.name, tag + ) class RootRedirect(resource.Resource): diff --git a/synapse/http/site.py b/synapse/http/site.py index dc64f0f6f5..e422c8dfae 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -67,13 +67,14 @@ class SynapseRequest(Request): ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count db_txn_duration_ms = context.db_txn_duration_ms + db_sched_duration_ms = context.db_sched_duration_ms except Exception: ru_utime, ru_stime = (0, 0) db_txn_count, db_txn_duration_ms = (0, 0) self.site.access_logger.info( "%s - %s - {%s}" - " Processed request: %dms (%dms, %dms) (%dms/%d)" + " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" " %sB %s \"%s %s %s\" \"%s\"", self.getClientIP(), self.site.site_tag, @@ -81,6 +82,7 @@ class SynapseRequest(Request): int(time.time() * 1000) - self.start_time, int(ru_utime * 1000), int(ru_stime * 1000), + db_sched_duration_ms, db_txn_duration_ms, int(db_txn_count), self.sentLength, diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 986617674c..68125006eb 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -347,7 +347,9 @@ class SQLBaseStore(object): def inner_func(conn, *args, **kwargs): with LoggingContext("runWithConnection") as context: - sql_scheduling_timer.inc_by(time.time() * 1000 - start_time) + sched_duration_ms = time.time() * 1000 - start_time + sql_scheduling_timer.inc_by(sched_duration_ms) + current_context.add_database_scheduled(sched_duration_ms) if self.database_engine.is_connection_closed(conn): logger.debug("Reconnecting closed database connection") diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a78e53812f..94fa7cac98 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -59,7 +59,8 @@ class LoggingContext(object): __slots__ = [ "previous_context", "name", "ru_stime", "ru_utime", - "db_txn_count", "db_txn_duration_ms", "usage_start", "usage_end", + "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "usage_start", "usage_end", "main_thread", "alive", "request", "tag", ] @@ -86,6 +87,9 @@ class LoggingContext(object): def add_database_transaction(self, duration_ms): pass + def add_database_scheduled(self, sched_ms): + pass + def __nonzero__(self): return False @@ -101,6 +105,9 @@ class LoggingContext(object): # ms spent waiting for db txns, excluding scheduling time self.db_txn_duration_ms = 0 + # ms spent waiting for db txns to be scheduled + self.db_sched_duration_ms = 0 + self.usage_start = None self.usage_end = None self.main_thread = threading.current_thread() @@ -210,6 +217,15 @@ class LoggingContext(object): self.db_txn_count += 1 self.db_txn_duration_ms += duration_ms + def add_database_scheduled(self, sched_ms): + """Record a use of the database pool + + Args: + sched_ms (int): number of milliseconds it took us to get a + connection + """ + self.db_sched_duration_ms += sched_ms + class LoggingContextFilter(logging.Filter): """Logging filter that adds values from the current logging context to each diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index d25629cc50..059bb7fedf 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -80,6 +80,11 @@ block_db_txn_duration = metrics.register_counter( ), ) +# seconds spent waiting for a db connection, in this block +block_db_sched_duration = metrics.register_counter( + "block_db_sched_duration_seconds", labels=["block_name"], +) + def measure_func(name): def wrapper(func): @@ -96,7 +101,9 @@ def measure_func(name): class Measure(object): __slots__ = [ "clock", "name", "start_context", "start", "new_context", "ru_utime", - "ru_stime", "db_txn_count", "db_txn_duration_ms", "created_context" + "ru_stime", + "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "created_context", ] def __init__(self, clock, name): @@ -117,6 +124,7 @@ class Measure(object): self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() self.db_txn_count = self.start_context.db_txn_count self.db_txn_duration_ms = self.start_context.db_txn_duration_ms + self.db_sched_duration_ms = self.start_context.db_sched_duration_ms def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: @@ -149,6 +157,10 @@ class Measure(object): (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., self.name ) + block_db_sched_duration.inc_by( + (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000., + self.name + ) if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) -- cgit 1.5.1 From 05f98a22249974ce40a461d12da93af0bc624319 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 16:42:43 +0000 Subject: Keep track of last access time for local media --- synapse/rest/media/v1/media_repository.py | 32 +++++++++++++++++----- synapse/storage/media_repository.py | 23 ++++++++++++++-- synapse/storage/prepare_database.py | 2 +- .../storage/schema/delta/47/last_access_media.sql | 19 +++++++++++++ 4 files changed, 65 insertions(+), 11 deletions(-) create mode 100644 synapse/storage/schema/delta/47/last_access_media.sql (limited to 'synapse/storage') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150e..b2c76440b7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -53,7 +53,7 @@ import urlparse logger = logging.getLogger(__name__) -UPDATE_RECENTLY_ACCESSED_REMOTES_TS = 60 * 1000 +UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 class MediaRepository(object): @@ -75,6 +75,7 @@ class MediaRepository(object): self.remote_media_linearizer = Linearizer(name="media_remote") self.recently_accessed_remotes = set() + self.recently_accessed_locals = set() # List of StorageProviders where we should search for media and # potentially upload to. @@ -99,19 +100,34 @@ class MediaRepository(object): ) self.clock.looping_call( - self._update_recently_accessed_remotes, - UPDATE_RECENTLY_ACCESSED_REMOTES_TS + self._update_recently_accessed, + UPDATE_RECENTLY_ACCESSED_TS, ) @defer.inlineCallbacks - def _update_recently_accessed_remotes(self): - media = self.recently_accessed_remotes + def _update_recently_accessed(self): + remote_media = self.recently_accessed_remotes self.recently_accessed_remotes = set() + local_media = self.recently_accessed_locals + self.recently_accessed_locals = set() + yield self.store.update_cached_last_access_time( - media, self.clock.time_msec() + local_media, remote_media, self.clock.time_msec() ) + def mark_recently_accessed(self, server_name, media_id): + """Mark the given media as recently accessed. + + Args: + server_name (str|None): Origin server of media, or None if local + media_id (str): The media ID of the content + """ + if server_name: + self.recently_accessed_remotes.add((server_name, media_id)) + else: + self.recently_accessed_locals.add(media_id) + @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): @@ -173,6 +189,8 @@ class MediaRepository(object): respond_404(request) return + self.mark_recently_accessed(None, media_id) + media_type = media_info["media_type"] media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] @@ -204,7 +222,7 @@ class MediaRepository(object): Deferred: Resolves once a response has successfully been written to request """ - self.recently_accessed_remotes.add((server_name, media_id)) + self.mark_recently_accessed(server_name, media_id) # We linearize here to ensure that we don't try and download remote # media multiple times concurrently diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 6ebc372498..e6cdbb0545 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -173,7 +173,14 @@ class MediaRepositoryStore(BackgroundUpdateStore): desc="store_cached_remote_media", ) - def update_cached_last_access_time(self, origin_id_tuples, time_ts): + def update_cached_last_access_time(self, local_media, remote_media, time_ms): + """Updates the last access time of the given media + + Args: + local_media (iterable[str]): Set of media_ids + remote_media (iterable[(str, str)]): Set of (server_name, media_id) + time_ms: Current time in milliseconds + """ def update_cache_txn(txn): sql = ( "UPDATE remote_media_cache SET last_access_ts = ?" @@ -181,8 +188,18 @@ class MediaRepositoryStore(BackgroundUpdateStore): ) txn.executemany(sql, ( - (time_ts, media_origin, media_id) - for media_origin, media_id in origin_id_tuples + (time_ms, media_origin, media_id) + for media_origin, media_id in remote_media + )) + + sql = ( + "UPDATE local_media_repository SET last_access_ts = ?" + " WHERE media_id = ?" + ) + + txn.executemany(sql, ( + (time_ms, media_id) + for media_id in local_media )) return self.runInteraction("update_cached_last_access_time", update_cache_txn) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index d1691bbac2..c845a0cec5 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 46 +SCHEMA_VERSION = 47 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/47/last_access_media.sql b/synapse/storage/schema/delta/47/last_access_media.sql new file mode 100644 index 0000000000..bc754ac861 --- /dev/null +++ b/synapse/storage/schema/delta/47/last_access_media.sql @@ -0,0 +1,19 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- drop the unique constraint on deleted_pushers so that we can just insert +-- into it rather than upserting. + +ALTER TABLE local_media_repository ADD COLUMN last_access_ts BIGINT; -- cgit 1.5.1 From 2fb3a28c9894f2cd1ed2ba3404a519d0bbd754cc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 14:59:44 +0000 Subject: Remove lost comment --- synapse/storage/schema/delta/47/last_access_media.sql | 3 --- 1 file changed, 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/47/last_access_media.sql b/synapse/storage/schema/delta/47/last_access_media.sql index bc754ac861..f505fb22b5 100644 --- a/synapse/storage/schema/delta/47/last_access_media.sql +++ b/synapse/storage/schema/delta/47/last_access_media.sql @@ -13,7 +13,4 @@ * limitations under the License. */ --- drop the unique constraint on deleted_pushers so that we can just insert --- into it rather than upserting. - ALTER TABLE local_media_repository ADD COLUMN last_access_ts BIGINT; -- cgit 1.5.1 From 390093d45e1951b1a1d8a034667d2e84b3bf064d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 17 Jan 2018 15:44:31 +0000 Subject: Split resolve_events into two functions ... so that the return type doesn't depend on the arg types --- synapse/state.py | 45 +++++++++++++++++++++++++++------------------ synapse/storage/events.py | 4 ++-- 2 files changed, 29 insertions(+), 20 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/state.py b/synapse/state.py index 9e624b4937..1f9abf9d3d 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -341,7 +341,7 @@ class StateHandler(object): if conflicted_state: logger.info("Resolving conflicted state for %r", room_id) with Measure(self.clock, "state._resolve_events"): - new_state = yield resolve_events( + new_state = yield resolve_events_with_factory( state_groups_ids.values(), state_map_factory=lambda ev_ids: self.store.get_events( ev_ids, get_prev_content=False, check_redacted=False, @@ -404,7 +404,7 @@ class StateHandler(object): } with Measure(self.clock, "state._resolve_events"): - new_state = resolve_events(state_set_ids, state_map) + new_state = resolve_events_with_state_map(state_set_ids, state_map) new_state = { key: state_map[ev_id] for key, ev_id in new_state.items() @@ -420,19 +420,17 @@ def _ordered_events(events): return sorted(events, key=key_func) -def resolve_events(state_sets, state_map_factory): +def resolve_events_with_state_map(state_sets, state_map): """ Args: state_sets(list): List of dicts of (type, state_key) -> event_id, which are the different state groups to resolve. - state_map_factory(dict|callable): If callable, then will be called - with a list of event_ids that are needed, and should return with - a Deferred of dict of event_id to event. Otherwise, should be - a dict from event_id to event of all events in state_sets. + state_map(dict): a dict from event_id to event, for all events in + state_sets. Returns - dict[(str, str), synapse.events.FrozenEvent] is a map from - (type, state_key) to event. + dict[(str, str), synapse.events.FrozenEvent]: + a map from (type, state_key) to event. """ if len(state_sets) == 1: return state_sets[0] @@ -441,13 +439,6 @@ def resolve_events(state_sets, state_map_factory): state_sets, ) - if callable(state_map_factory): - return _resolve_with_state_fac( - unconflicted_state, conflicted_state, state_map_factory - ) - - state_map = state_map_factory - auth_events = _create_auth_events_from_maps( unconflicted_state, conflicted_state, state_map ) @@ -491,8 +482,26 @@ def _seperate(state_sets): @defer.inlineCallbacks -def _resolve_with_state_fac(unconflicted_state, conflicted_state, - state_map_factory): +def resolve_events_with_factory(state_sets, state_map_factory): + """ + Args: + state_sets(list): List of dicts of (type, state_key) -> event_id, + which are the different state groups to resolve. + state_map_factory(func): will be called + with a list of event_ids that are needed, and should return with + a Deferred of dict of event_id to event. + + Returns + Deferred[dict[(str, str), synapse.events.FrozenEvent]]: + a map from (type, state_key) to event. + """ + if len(state_sets) == 1: + defer.returnValue(state_sets[0]) + + unconflicted_state, conflicted_state = _seperate( + state_sets, + ) + needed_events = set( event_id for event_ids in conflicted_state.itervalues() diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ad1d782705..c5292a5311 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -27,7 +27,7 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError -from synapse.state import resolve_events +from synapse.state import resolve_events_with_factory from synapse.util.caches.descriptors import cached from synapse.types import get_domain_from_id @@ -557,7 +557,7 @@ class EventsStore(SQLBaseStore): to_return.update(evs) defer.returnValue(to_return) - current_state = yield resolve_events( + current_state = yield resolve_events_with_factory( state_sets, state_map_factory=get_events, ) -- cgit 1.5.1 From 2d9ab533f9faa3f98eea166b05d3a0fb7fc2f80c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 17 Jan 2018 15:58:52 +0000 Subject: fix SQL when searching all users --- synapse/storage/user_directory.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index c9bff408ef..f150ef0103 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -641,8 +641,13 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - join_clause = "" - where_clause = "?<>''" # naughty hack to keep the same number of binds + # dummy to keep the number of binds & aliases the same + join_clause = """ + LEFT JOIN ( + SELECT NULL as user_id WHERE NULL = ? + ) AS s USING (user_id)" + """ + where_clause = "" else: join_clause = """ LEFT JOIN users_in_public_rooms AS p USING (user_id) -- cgit 1.5.1 From 1224612a798ce9f14f0d44e1246f87da15a959f1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 17 Jan 2018 16:01:59 +0000 Subject: Log room when doing state resolution Mostly because it helps figure out what is prompting the resolution --- synapse/storage/events.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d08f7571d7..ba0da83642 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -528,6 +528,12 @@ class EventsStore(SQLBaseStore): # the events we have yet to persist, so we need a slightly more # complicated event lookup function than simply looking the events # up in the db. + + logger.info( + "Resolving state for %s with %i state sets", + room_id, len(state_sets), + ) + events_map = {ev.event_id: ev for ev, _ in events_context} @defer.inlineCallbacks -- cgit 1.5.1 From 5552ed9a7fb1300142a7aebe7fc85b0bd2535bcf Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 20 Jan 2018 22:25:23 -0700 Subject: Add an admin route to get all the media in a room This is intended to be used by administrators to monitor the media that is passing through their server, if they wish. Signed-off-by: Travis Ralston --- synapse/rest/client/v1/admin.py | 22 +++++++ synapse/storage/room.py | 131 +++++++++++++++++++++++----------------- 2 files changed, 97 insertions(+), 56 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 5022808ea9..0615e5d807 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -289,6 +289,27 @@ class QuarantineMediaInRoom(ClientV1RestServlet): defer.returnValue((200, {"num_quarantined": num_quarantined})) +class ListMediaInRoom(ClientV1RestServlet): + """Lists all of the media in a given room. + """ + PATTERNS = client_path_patterns("/admin/room/(?P[^/]+)/media") + + def __init__(self, hs): + super(ListMediaInRoom, self).__init__(hs) + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + if not is_admin: + raise AuthError(403, "You are not a server admin") + + local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id) + + defer.returnValue((200, {"local": local_mxcs, "remote": remote_mxcs})) + + class ResetPasswordRestServlet(ClientV1RestServlet): """Post request to allow an administrator reset password for a user. This needs user to have administrator access in Synapse. @@ -487,3 +508,4 @@ def register_servlets(hs, http_server): SearchUsersRestServlet(hs).register(http_server) ShutdownRoomRestServlet(hs).register(http_server) QuarantineMediaInRoom(hs).register(http_server) + ListMediaInRoom(hs).register(http_server) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b7..cd6899a4b5 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -533,73 +533,92 @@ class RoomStore(SQLBaseStore): ) self.is_room_blocked.invalidate((room_id,)) + def get_media_mxcs_in_room(self, room_id): + def _get_media_ids_in_room(txn): + local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + local_media_mxcs = [] + remote_media_mxcs = [] + + # Convert the IDs to MXC URIs + for media_id in local_media_ids: + local_media_mxcs.append("mxc://%s/%s" % (self.hostname, media_id)) + for hostname, media_id in remote_media_ids: + remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) + + return local_media_mxcs, remote_media_mxcs + return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + def quarantine_media_ids_in_room(self, room_id, quarantined_by): """For a room loops through all events with media and quarantines the associated media """ - def _get_media_ids_in_room(txn): - mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") + def _quarantine_media_in_room(txn): + local_media_mxcs, remote_media_mxcs = self._get_media_ids_in_room(txn, room_id) + total_media_quarantined = 0 - next_token = self.get_current_events_token() + 1 + # Now update all the tables to set the quarantined_by flag - total_media_quarantined = 0 + txn.executemany(""" + UPDATE local_media_repository + SET quarantined_by = ? + WHERE media_id = ? + """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) - while next_token: - sql = """ - SELECT stream_ordering, content FROM events - WHERE room_id = ? - AND stream_ordering < ? - AND contains_url = ? AND outlier = ? - ORDER BY stream_ordering DESC - LIMIT ? + txn.executemany( """ - txn.execute(sql, (room_id, next_token, True, False, 100)) - - next_token = None - local_media_mxcs = [] - remote_media_mxcs = [] - for stream_ordering, content_json in txn: - next_token = stream_ordering - content = json.loads(content_json) - - content_url = content.get("url") - thumbnail_url = content.get("info", {}).get("thumbnail_url") - - for url in (content_url, thumbnail_url): - if not url: - continue - matches = mxc_re.match(url) - if matches: - hostname = matches.group(1) - media_id = matches.group(2) - if hostname == self.hostname: - local_media_mxcs.append(media_id) - else: - remote_media_mxcs.append((hostname, media_id)) - - # Now update all the tables to set the quarantined_by flag - - txn.executemany(""" - UPDATE local_media_repository + UPDATE remote_media_cache SET quarantined_by = ? - WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) - - txn.executemany( - """ - UPDATE remote_media_cache - SET quarantined_by = ? - WHERE media_origin AND media_id = ? - """, - ( - (quarantined_by, origin, media_id) - for origin, media_id in remote_media_mxcs - ) + WHERE media_origin AND media_id = ? + """, + ( + (quarantined_by, origin, media_id) + for origin, media_id in remote_media_mxcs ) + ) - total_media_quarantined += len(local_media_mxcs) - total_media_quarantined += len(remote_media_mxcs) + total_media_quarantined += len(local_media_mxcs) + total_media_quarantined += len(remote_media_mxcs) return total_media_quarantined - return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room) + + def _get_media_ids_in_room(self, txn, room_id): + mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") + + next_token = self.get_current_events_token() + 1 + local_media_mxcs = [] + remote_media_mxcs = [] + + while next_token: + sql = """ + SELECT stream_ordering, content FROM events + WHERE room_id = ? + AND stream_ordering < ? + AND contains_url = ? AND outlier = ? + ORDER BY stream_ordering DESC + LIMIT ? + """ + txn.execute(sql, (room_id, next_token, True, False, 100)) + + next_token = None + for stream_ordering, content_json in txn: + next_token = stream_ordering + content = json.loads(content_json) + + content_url = content.get("url") + thumbnail_url = content.get("info", {}).get("thumbnail_url") + + for url in (content_url, thumbnail_url): + if not url: + continue + matches = mxc_re.match(url) + if matches: + hostname = matches.group(1) + media_id = matches.group(2) + if hostname == self.hostname: + local_media_mxcs.append(media_id) + else: + remote_media_mxcs.append((hostname, media_id)) + + return local_media_mxcs, remote_media_mxcs -- cgit 1.5.1 From a94d9b6b825c6b2db375460268567e637e10709a Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 20 Jan 2018 22:49:46 -0700 Subject: Appease the linter These are ids anyways, not mxc uris. Signed-off-by: Travis Ralston --- synapse/storage/room.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index cd6899a4b5..d1d63f4041 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -553,7 +553,7 @@ class RoomStore(SQLBaseStore): the associated media """ def _quarantine_media_in_room(txn): - local_media_mxcs, remote_media_mxcs = self._get_media_ids_in_room(txn, room_id) + local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -562,7 +562,7 @@ class RoomStore(SQLBaseStore): UPDATE local_media_repository SET quarantined_by = ? WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) + """, ((quarantined_by, media_id) for media_id in local_media_ids)) txn.executemany( """ @@ -572,12 +572,12 @@ class RoomStore(SQLBaseStore): """, ( (quarantined_by, origin, media_id) - for origin, media_id in remote_media_mxcs + for origin, media_id in remote_media_ids ) ) - total_media_quarantined += len(local_media_mxcs) - total_media_quarantined += len(remote_media_mxcs) + total_media_quarantined += len(local_media_ids) + total_media_quarantined += len(remote_media_ids) return total_media_quarantined -- cgit 1.5.1 From 46022025ea35895af3cf8d15973fb94a3a6b4f38 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:20:28 +0000 Subject: Fix SQL for user search fix some syntax errors for user search when search_all_users is enabled fixes #2801, hopefully --- synapse/storage/user_directory.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index f150ef0103..dfdcbb3181 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -641,13 +641,12 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - # dummy to keep the number of binds & aliases the same + # make s.user_id null to keep the ordering algorithm happy join_clause = """ - LEFT JOIN ( - SELECT NULL as user_id WHERE NULL = ? - ) AS s USING (user_id)" + CROSS JOIN (SELECT NULL as user_id) AS s """ - where_clause = "" + join_args = () + where_clause = "1=1" else: join_clause = """ LEFT JOIN users_in_public_rooms AS p USING (user_id) @@ -656,6 +655,7 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND share_private ) AS s USING (user_id) """ + join_args = (user_id,) where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" if isinstance(self.database_engine, PostgresEngine): @@ -697,7 +697,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) - args = (user_id, full_query, exact_query, prefix_query, limit + 1,) + args = join_args + (full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -715,7 +715,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) - args = (user_id, search_query, limit + 1) + args = join_args + (search_query, limit + 1) else: # This should be unreachable. raise Exception("Unrecognized database engine") -- cgit 1.5.1 From b387ee17b68e4398a8fa26fdf122b773a046e429 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 14:00:11 +0000 Subject: Improve exception handling in persist_event 1. use `deferred.errback()` instead of `deferred.errback(e)`, which means that a Failure object will be constructed using the current exception state, *including* its stack trace - so the stack trace is saved in the Failure, leading to better exception reports. 2. Set `consumeErrors=True` on the ObservableDeferred, because we know that there will always be at least one observer - which avoids a spurious "CRITICAL: unhandled exception in Deferred" error in the logs --- synapse/storage/events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a9cd3ec90..33fccfa7a8 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -110,7 +110,7 @@ class _EventPeristenceQueue(object): end_item.events_and_contexts.extend(events_and_contexts) return end_item.deferred.observe() - deferred = ObservableDeferred(defer.Deferred()) + deferred = ObservableDeferred(defer.Deferred(), consumeErrors=True) queue.append(self._EventPersistQueueItem( events_and_contexts=events_and_contexts, @@ -152,8 +152,8 @@ class _EventPeristenceQueue(object): try: ret = yield per_item_callback(item) item.deferred.callback(ret) - except Exception as e: - item.deferred.errback(e) + except Exception: + item.deferred.errback() finally: queue = self._event_persist_queues.pop(room_id, None) if queue: -- cgit 1.5.1 From 773f0eed1efa114bb32f6e54e8edc038a04d3526 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 15:02:51 +0000 Subject: Fix sql error in quarantine_media --- synapse/storage/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b7..d91c853070 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -589,7 +589,7 @@ class RoomStore(SQLBaseStore): """ UPDATE remote_media_cache SET quarantined_by = ? - WHERE media_origin AND media_id = ? + WHERE media_origin = ? AND media_id = ? """, ( (quarantined_by, origin, media_id) -- cgit 1.5.1 From 63c4383927cfb759046ccf576e0c7e35a70f6168 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 08:07:41 -0700 Subject: Documentation and naming Signed-off-by: Travis Ralston --- synapse/storage/room.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index d1d63f4041..5dfb0e19f7 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -534,8 +534,17 @@ class RoomStore(SQLBaseStore): self.is_room_blocked.invalidate((room_id,)) def get_media_mxcs_in_room(self, room_id): - def _get_media_ids_in_room(txn): - local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + """Retrieves all the local and remote media MXC URIs in a given room + + Args: + room_id (str) + + Returns: + The local and remote media as a lists of tuples where the key is + the hostname and the value is the media ID. + """ + def _get_media_mxcs_in_room_txn(txn): + local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) local_media_mxcs = [] remote_media_mxcs = [] @@ -546,14 +555,14 @@ class RoomStore(SQLBaseStore): remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) return local_media_mxcs, remote_media_mxcs - return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + return self.runInteraction("get_media_ids_in_room", _get_media_mxcs_in_room_txn) def quarantine_media_ids_in_room(self, room_id, quarantined_by): """For a room loops through all events with media and quarantines the associated media """ - def _quarantine_media_in_room(txn): - local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + def _quarantine_media_in_room_txn(txn): + local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -581,9 +590,19 @@ class RoomStore(SQLBaseStore): return total_media_quarantined - return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room) + return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room_txn) - def _get_media_ids_in_room(self, txn, room_id): + def _get_media_mxcs_in_room_txn(self, txn, room_id): + """Retrieves all the local and remote media MXC URIs in a given room + + Args: + txn (cursor) + room_id (str) + + Returns: + The local and remote media as a lists of tuples where the key is + the hostname and the value is the media ID. + """ mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") next_token = self.get_current_events_token() + 1 -- cgit 1.5.1 From e1e4ec9f9d6570e7f5a3f519113516f47ec872e4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:43:40 +0000 Subject: factor _get_new_state_after_events out of _calculate_state_delta This reduces the scope of a bunch of variables --- synapse/storage/events.py | 57 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 18 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 33fccfa7a8..dd28c2efe3 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -386,11 +386,18 @@ class EventsStore(SQLBaseStore): if all_single_prev_not_state: continue - state = yield self._calculate_state_delta( - room_id, ev_ctx_rm, new_latest_event_ids + logger.info( + "Calculating state delta for room %s", room_id, ) - if state: - current_state_for_room[room_id] = state + current_state = yield self._get_new_state_after_events( + ev_ctx_rm, new_latest_event_ids, + ) + if current_state is not None: + delta = yield self._calculate_state_delta( + room_id, current_state, + ) + if delta is not None: + current_state_for_room[room_id] = delta yield self.runInteraction( "persist_events", @@ -467,20 +474,22 @@ class EventsStore(SQLBaseStore): defer.returnValue(new_latest_event_ids) @defer.inlineCallbacks - def _calculate_state_delta(self, room_id, events_context, new_latest_event_ids): - """Calculate the new state deltas for a room. + def _get_new_state_after_events(self, events_context, new_latest_event_ids): + """Calculate the current state dict after adding some new events to + a room - Assumes that we are only persisting events for one room at a time. + Args: + events_context (list[(EventBase, EventContext)]): + events and contexts which are being added to the room + + new_latest_event_ids (iterable[str]): + the new forward extremities for the room. Returns: - 3-tuple (to_delete, to_insert, new_state) where both are state dicts, - i.e. (type, state_key) -> event_id. `to_delete` are the entries to - first be deleted from current_state_events, `to_insert` are entries - to insert. `new_state` is the full set of state. - May return None if there are no changes to be applied. + Deferred[dict[(str,str), str]|None]: + None if there are no changes to the room state, or + a dict of (type, state_key) -> event_id]. """ - # Now we need to work out the different state sets for - # each state extremities state_sets = [] state_groups = set() missing_event_ids = [] @@ -523,12 +532,12 @@ class EventsStore(SQLBaseStore): state_sets.extend(group_to_state.itervalues()) if not new_latest_event_ids: - current_state = {} + defer.returnValue({}) elif was_updated: if len(state_sets) == 1: # If there is only one state set, then we know what the current # state is. - current_state = state_sets[0] + defer.returnValue(state_sets[0]) else: # We work out the current state by passing the state sets to the # state resolution algorithm. It may ask for some events, including @@ -537,8 +546,7 @@ class EventsStore(SQLBaseStore): # up in the db. logger.info( - "Resolving state for %s with %i state sets", - room_id, len(state_sets), + "Resolving state with %i state sets", len(state_sets), ) events_map = {ev.event_id: ev for ev, _ in events_context} @@ -567,9 +575,22 @@ class EventsStore(SQLBaseStore): state_sets, state_map_factory=get_events, ) + defer.returnValue(current_state) else: return + @defer.inlineCallbacks + def _calculate_state_delta(self, room_id, current_state): + """Calculate the new state deltas for a room. + + Assumes that we are only persisting events for one room at a time. + + Returns: + 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + i.e. (type, state_key) -> event_id. `to_delete` are the entries to + first be deleted from current_state_events, `to_insert` are entries + to insert. `new_state` is the full set of state. + """ existing_state = yield self.get_current_state_ids(room_id) existing_events = set(existing_state.itervalues()) -- cgit 1.5.1 From e16e45b1b44c9b9f7d44e6b50406268869759397 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 15:30:38 -0700 Subject: pep8 Signed-off-by: Travis Ralston --- synapse/storage/room.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 5dfb0e19f7..961ad5abca 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -544,14 +544,14 @@ class RoomStore(SQLBaseStore): the hostname and the value is the media ID. """ def _get_media_mxcs_in_room_txn(txn): - local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) + local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id) local_media_mxcs = [] remote_media_mxcs = [] # Convert the IDs to MXC URIs - for media_id in local_media_ids: + for media_id in local_mxcs: local_media_mxcs.append("mxc://%s/%s" % (self.hostname, media_id)) - for hostname, media_id in remote_media_ids: + for hostname, media_id in remote_mxcs: remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) return local_media_mxcs, remote_media_mxcs @@ -562,7 +562,7 @@ class RoomStore(SQLBaseStore): the associated media """ def _quarantine_media_in_room_txn(txn): - local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) + local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -571,7 +571,7 @@ class RoomStore(SQLBaseStore): UPDATE local_media_repository SET quarantined_by = ? WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_ids)) + """, ((quarantined_by, media_id) for media_id in local_mxcs)) txn.executemany( """ @@ -581,16 +581,19 @@ class RoomStore(SQLBaseStore): """, ( (quarantined_by, origin, media_id) - for origin, media_id in remote_media_ids + for origin, media_id in remote_mxcs ) ) - total_media_quarantined += len(local_media_ids) - total_media_quarantined += len(remote_media_ids) + total_media_quarantined += len(local_mxcs) + total_media_quarantined += len(remote_mxcs) return total_media_quarantined - return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room_txn) + return self.runInteraction( + "quarantine_media_in_room", + _quarantine_media_in_room_txn, + ) def _get_media_mxcs_in_room_txn(self, txn, room_id): """Retrieves all the local and remote media MXC URIs in a given room -- cgit 1.5.1 From 4eeae7ad657729eb8c2765da6fb40fc983c740f7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 22:57:33 +0000 Subject: Move store_event_search_txn to SearchStore ... as a precursor to making event storing and doing the bg update share some code. --- synapse/storage/room.py | 45 ++++++++------------------------------------- synapse/storage/search.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 37 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9f373b47e0..0fcfb7f86d 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,11 +16,9 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore -from .engines import PostgresEngine, Sqlite3Engine - import collections import logging import ujson as json @@ -40,7 +38,7 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SQLBaseStore): +class RoomStore(SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -263,8 +261,8 @@ class RoomStore(SQLBaseStore): }, ) - self._store_event_search_txn( - txn, event, "content.topic", event.content["topic"] + self.store_event_search_txn( + txn, event, "content.topic", event.content["topic"], ) def _store_room_name_txn(self, txn, event): @@ -279,14 +277,14 @@ class RoomStore(SQLBaseStore): } ) - self._store_event_search_txn( - txn, event, "content.name", event.content["name"] + self.store_event_search_txn( + txn, event, "content.name", event.content["name"], ) def _store_room_message_txn(self, txn, event): if hasattr(event, "content") and "body" in event.content: - self._store_event_search_txn( - txn, event, "content.body", event.content["body"] + self.store_event_search_txn( + txn, event, "content.body", event.content["body"], ) def _store_history_visibility_txn(self, txn, event): @@ -308,33 +306,6 @@ class RoomStore(SQLBaseStore): event.content[key] )) - def _store_event_search_txn(self, txn, event, key, value): - if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") - sql = ( - "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" - " VALUES (?,?,?,to_tsvector('english', ?),?,?)" - ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) - txn.execute("RESET work_mem") - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - def add_event_report(self, room_id, event_id, user_id, reason, content, received_ts): next_id = self._event_reports_id_gen.get_next() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index f52f3c8592..205e8d0017 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -246,6 +246,41 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(num_rows) + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256kB'") + sql = ( + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) + ) + txn.execute("RESET work_mem") + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. -- cgit 1.5.1 From bd25f9cf36ff86d1616853d88cebd2a4a83fa552 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:05:41 +0000 Subject: Clean up work_mem handling Add some comments and improve exception handling when twiddling work_mem for the search update --- synapse/storage/search.py | 52 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 11 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 205e8d0017..190751bade 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import sys from twisted.internet import defer from .background_updates import BackgroundUpdateStore @@ -256,21 +256,51 @@ class SearchStore(BackgroundUpdateStore): value (str): """ if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " (event_id, room_id, key, vector, stream_ordering, " + " origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, + + # inserts to a GIN index are normally batched up into a pending + # list, and then all committed together once the list gets to a + # certain size. The trouble with that is that postgres (pre-9.5) + # uses work_mem to determine the length of the list, and work_mem + # is typically very large. + # + # We therefore reduce work_mem while we do the insert. + # + # (postgres 9.5 uses the separate gin_pending_list_limit setting, + # so doesn't suffer the same problem, but changing work_mem will + # be harmless) + + txn.execute("SET work_mem='256kB'") + try: + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) ) - ) - txn.execute("RESET work_mem") + except Exception: + # we need to reset work_mem, but doing so may throw a new + # exception and we want to preserve the original + t, v, tb = sys.exc_info() + try: + txn.execute("RESET work_mem") + except Exception as e: + logger.warn( + "exception resetting work_mem during exception " + "handling: %r", + e, + ) + raise t, v, tb + else: + txn.execute("RESET work_mem") + elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" -- cgit 1.5.1 From 80b8a28100e29e34bdc6226513575789310aa41f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:07:13 +0000 Subject: Factor out common code for search insert we can reuse the same code as is used for event insert, for doing the background index population. --- synapse/storage/search.py | 89 +++++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 33 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 190751bade..eecf778516 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from collections import namedtuple import sys from twisted.internet import defer @@ -26,6 +27,11 @@ import ujson as json logger = logging.getLogger(__name__) +SearchEntry = namedtuple('SearchEntry', [ + 'key', 'value', 'event_id', 'room_id', 'stream_ordering', + 'origin_server_ts', +]) + class SearchStore(BackgroundUpdateStore): @@ -49,16 +55,17 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): + # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) - INSERT_CLUMP_SIZE = 1000 TYPES = ["m.room.name", "m.room.message", "m.room.topic"] def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, event_id, room_id, type, content FROM events" + "SELECT stream_ordering, event_id, room_id, type, content, " + " origin_server_ts FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" @@ -67,6 +74,10 @@ class SearchStore(BackgroundUpdateStore): txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + # we could stream straight from the results into + # store_search_entries_txn with a generator function, but that + # would mean having two cursors open on the database at once. + # Instead we just build a list of results. rows = self.cursor_to_dict(txn) if not rows: return 0 @@ -79,6 +90,8 @@ class SearchStore(BackgroundUpdateStore): event_id = row["event_id"] room_id = row["room_id"] etype = row["type"] + stream_ordering = row["stream_ordering"] + origin_server_ts = row["origin_server_ts"] try: content = json.loads(row["content"]) except Exception: @@ -93,6 +106,8 @@ class SearchStore(BackgroundUpdateStore): elif etype == "m.room.name": key = "content.name" value = content["name"] + else: + raise Exception("unexpected event type %s" % etype) except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. @@ -103,29 +118,16 @@ class SearchStore(BackgroundUpdateStore): # then skip over it continue - event_search_rows.append((event_id, room_id, key, value)) - - if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): - clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) + event_search_rows.append(SearchEntry( + key=key, + value=value, + event_id=event_id, + room_id=room_id, + stream_ordering=stream_ordering, + origin_server_ts=origin_server_ts, + )) - if isinstance(self.database_engine, PostgresEngine): - txn.execute("RESET work_mem") + self.store_search_entries_txn(txn, event_search_rows) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -255,6 +257,26 @@ class SearchStore(BackgroundUpdateStore): key (str): value (str): """ + self.store_search_entries_txn( + txn, + (SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ),), + ) + + def store_search_entries_txn(self, txn, entries): + """Add entries to the search table + + Args: + txn (cursor): + entries (iterable[SearchEntry]): + entries to be added to the table + """ if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" @@ -262,6 +284,10 @@ class SearchStore(BackgroundUpdateStore): " origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + entry.stream_ordering, entry.origin_server_ts, + ) for entry in entries) # inserts to a GIN index are normally batched up into a pending # list, and then all committed together once the list gets to a @@ -277,14 +303,7 @@ class SearchStore(BackgroundUpdateStore): txn.execute("SET work_mem='256kB'") try: - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) + txn.executemany(sql, args) except Exception: # we need to reset work_mem, but doing so may throw a new # exception and we want to preserve the original @@ -306,7 +325,11 @@ class SearchStore(BackgroundUpdateStore): "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + ) for entry in entries) + + txn.executemany(sql, args) else: # This should be unreachable. raise Exception("Unrecognized database engine") -- cgit 1.5.1 From c46e75d3d8311f378f234e3de4719d6fa5d380c9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 22:57:33 +0000 Subject: Move store_event_search_txn to SearchStore ... as a precursor to making event storing and doing the bg update share some code. --- synapse/storage/room.py | 43 ++++++++----------------------------------- synapse/storage/search.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 35 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index cf2c4dae39..fff6652e05 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,11 +16,9 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore -from .engines import PostgresEngine, Sqlite3Engine - import collections import logging import ujson as json @@ -40,7 +38,7 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SQLBaseStore): +class RoomStore(SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -263,8 +261,8 @@ class RoomStore(SQLBaseStore): }, ) - self._store_event_search_txn( - txn, event, "content.topic", event.content["topic"] + self.store_event_search_txn( + txn, event, "content.topic", event.content["topic"], ) def _store_room_name_txn(self, txn, event): @@ -279,14 +277,14 @@ class RoomStore(SQLBaseStore): } ) - self._store_event_search_txn( - txn, event, "content.name", event.content["name"] + self.store_event_search_txn( + txn, event, "content.name", event.content["name"], ) def _store_room_message_txn(self, txn, event): if hasattr(event, "content") and "body" in event.content: - self._store_event_search_txn( - txn, event, "content.body", event.content["body"] + self.store_event_search_txn( + txn, event, "content.body", event.content["body"], ) def _store_history_visibility_txn(self, txn, event): @@ -308,31 +306,6 @@ class RoomStore(SQLBaseStore): event.content[key] )) - def _store_event_search_txn(self, txn, event, key, value): - if isinstance(self.database_engine, PostgresEngine): - sql = ( - "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" - " VALUES (?,?,?,to_tsvector('english', ?),?,?)" - ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - def add_event_report(self, room_id, event_id, user_id, reason, content, received_ts): next_id = self._event_reports_id_gen.get_next() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c636..4f38a587c8 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -242,6 +242,39 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(num_rows) + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. -- cgit 1.5.1 From 3c7b480ba33c68bfc4e98de57b6874c32011c8f4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:07:13 +0000 Subject: Factor out common code for search insert we can reuse the same code as is used for event insert, for doing the background index population. --- synapse/storage/search.py | 95 +++++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 33 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 4f38a587c8..f1ac9ba0fd 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -13,19 +13,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import namedtuple +import logging +import re +import ujson as json + from twisted.internet import defer from .background_updates import BackgroundUpdateStore from synapse.api.errors import SynapseError from synapse.storage.engines import PostgresEngine, Sqlite3Engine -import logging -import re -import ujson as json - logger = logging.getLogger(__name__) +SearchEntry = namedtuple('SearchEntry', [ + 'key', 'value', 'event_id', 'room_id', 'stream_ordering', + 'origin_server_ts', +]) + class SearchStore(BackgroundUpdateStore): @@ -49,16 +55,17 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): + # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) - INSERT_CLUMP_SIZE = 1000 TYPES = ["m.room.name", "m.room.message", "m.room.topic"] def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, event_id, room_id, type, content FROM events" + "SELECT stream_ordering, event_id, room_id, type, content, " + " origin_server_ts FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" @@ -67,6 +74,10 @@ class SearchStore(BackgroundUpdateStore): txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + # we could stream straight from the results into + # store_search_entries_txn with a generator function, but that + # would mean having two cursors open on the database at once. + # Instead we just build a list of results. rows = self.cursor_to_dict(txn) if not rows: return 0 @@ -79,6 +90,8 @@ class SearchStore(BackgroundUpdateStore): event_id = row["event_id"] room_id = row["room_id"] etype = row["type"] + stream_ordering = row["stream_ordering"] + origin_server_ts = row["origin_server_ts"] try: content = json.loads(row["content"]) except Exception: @@ -93,6 +106,8 @@ class SearchStore(BackgroundUpdateStore): elif etype == "m.room.name": key = "content.name" value = content["name"] + else: + raise Exception("unexpected event type %s" % etype) except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. @@ -103,25 +118,16 @@ class SearchStore(BackgroundUpdateStore): # then skip over it continue - event_search_rows.append((event_id, room_id, key, value)) - - if isinstance(self.database_engine, PostgresEngine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") + event_search_rows.append(SearchEntry( + key=key, + value=value, + event_id=event_id, + room_id=room_id, + stream_ordering=stream_ordering, + origin_server_ts=origin_server_ts, + )) - for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): - clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) + self.store_search_entries_txn(txn, event_search_rows) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -251,26 +257,49 @@ class SearchStore(BackgroundUpdateStore): key (str): value (str): """ + self.store_search_entries_txn( + txn, + (SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ),), + ) + + def store_search_entries_txn(self, txn, entries): + """Add entries to the search table + + Args: + txn (cursor): + entries (iterable[SearchEntry]): + entries to be added to the table + """ if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) + + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + entry.stream_ordering, entry.origin_server_ts, + ) for entry in entries) + + txn.executemany(sql, args) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + ) for entry in entries) + + txn.executemany(sql, args) else: # This should be unreachable. raise Exception("Unrecognized database engine") -- cgit 1.5.1 From ee6fb4cf8560534a9acc61b075c09dceeca83e85 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 20 Jan 2018 00:23:36 +0000 Subject: Remove redundant return value from _calculate_state_delta we already have the state from _get_new_state_after_events, so returning it from _calculate_state_delta is just confusing. --- synapse/storage/events.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dd28c2efe3..2fead9eb0f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -342,8 +342,20 @@ class EventsStore(SQLBaseStore): # NB: Assumes that we are only persisting events for one room # at a time. + + # map room_id->list[event_ids] giving the new forward + # extremities in each room new_forward_extremeties = {} + + # map room_id->(type,state_key)->event_id tracking the full + # state in each room after adding these events current_state_for_room = {} + + # map room_id->(to_delete, to_insert) where each entry is + # a map (type,key)->event_id giving the state delta in each + # room + state_delta_for_room = {} + if not backfilled: with Measure(self._clock, "_calculate_state_and_extrem"): # Work out the new "current state" for each room. @@ -393,11 +405,12 @@ class EventsStore(SQLBaseStore): ev_ctx_rm, new_latest_event_ids, ) if current_state is not None: + current_state_for_room[room_id] = current_state delta = yield self._calculate_state_delta( room_id, current_state, ) if delta is not None: - current_state_for_room[room_id] = delta + state_delta_for_room[room_id] = delta yield self.runInteraction( "persist_events", @@ -405,7 +418,7 @@ class EventsStore(SQLBaseStore): events_and_contexts=chunk, backfilled=backfilled, delete_existing=delete_existing, - current_state_for_room=current_state_for_room, + state_delta_for_room=state_delta_for_room, new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) @@ -422,7 +435,7 @@ class EventsStore(SQLBaseStore): event_counter.inc(event.type, origin_type, origin_entity) - for room_id, (_, _, new_state) in current_state_for_room.iteritems(): + for room_id, new_state in current_state_for_room.iteritems(): self.get_current_state_ids.prefill( (room_id, ), new_state ) @@ -586,10 +599,10 @@ class EventsStore(SQLBaseStore): Assumes that we are only persisting events for one room at a time. Returns: - 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + 2-tuple (to_delete, to_insert) where both are state dicts, i.e. (type, state_key) -> event_id. `to_delete` are the entries to first be deleted from current_state_events, `to_insert` are entries - to insert. `new_state` is the full set of state. + to insert. """ existing_state = yield self.get_current_state_ids(room_id) @@ -610,7 +623,7 @@ class EventsStore(SQLBaseStore): if ev_id in events_to_insert } - defer.returnValue((to_delete, to_insert, current_state)) + defer.returnValue((to_delete, to_insert)) @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -670,7 +683,7 @@ class EventsStore(SQLBaseStore): @log_function def _persist_events_txn(self, txn, events_and_contexts, backfilled, - delete_existing=False, current_state_for_room={}, + delete_existing=False, state_delta_for_room={}, new_forward_extremeties={}): """Insert some number of room events into the necessary database tables. @@ -686,7 +699,7 @@ class EventsStore(SQLBaseStore): delete_existing (bool): True to purge existing table rows for the events from the database. This is useful when retrying due to IntegrityError. - current_state_for_room (dict[str, (list[str], list[str])]): + state_delta_for_room (dict[str, (list[str], list[str])]): The current-state delta for each room. For each room, a tuple (to_delete, to_insert), being a list of event ids to be removed from the current state, and a list of event ids to be added to @@ -698,7 +711,7 @@ class EventsStore(SQLBaseStore): """ max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering - self._update_current_state_txn(txn, current_state_for_room, max_stream_order) + self._update_current_state_txn(txn, state_delta_for_room, max_stream_order) self._update_forward_extremities_txn( txn, @@ -764,7 +777,7 @@ class EventsStore(SQLBaseStore): def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order): for room_id, current_state_tuple in state_delta_by_room.iteritems(): - to_delete, to_insert, _ = current_state_tuple + to_delete, to_insert = current_state_tuple txn.executemany( "DELETE FROM current_state_events WHERE event_id = ?", [(ev_id,) for ev_id in to_delete.itervalues()], -- cgit 1.5.1 From 3d33eef6fcbba474664a9bccdcb8822c6f72ee8c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 14:31:24 +0000 Subject: Store state groups separately from events (#2784) * Split state group persist into seperate storage func * Add per database engine code for state group id gen * Move store_state_group to StateReadStore This allows other workers to use it, and so resolve state. * Hook up store_state_group * Fix tests * Rename _store_mult_state_groups_txn * Rename StateGroupReadStore * Remove redundant _have_persisted_state_group_txn * Update comments * Comment compute_event_context * Set start val for state_group_id_seq ... otherwise we try to recreate old state groups * Update comments * Don't store state for outliers * Update comment * Update docstring as state groups are ints --- synapse/events/snapshot.py | 4 +- synapse/handlers/federation.py | 24 ++- synapse/replication/slave/storage/events.py | 4 +- synapse/state.py | 56 +++++- synapse/storage/__init__.py | 1 - synapse/storage/engines/postgres.py | 6 + synapse/storage/engines/sqlite3.py | 19 ++ synapse/storage/events.py | 10 +- synapse/storage/schema/delta/47/state_group_seq.py | 37 ++++ synapse/storage/state.py | 196 +++++++++++---------- tests/replication/slave/storage/test_events.py | 4 +- tests/test_state.py | 154 +++++++++------- 12 files changed, 326 insertions(+), 189 deletions(-) create mode 100644 synapse/storage/schema/delta/47/state_group_seq.py (limited to 'synapse/storage') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index e9a732ff03..87e3fe7b97 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -25,7 +25,9 @@ class EventContext(object): The current state map excluding the current event. (type, state_key) -> event_id - state_group (int): state group id + state_group (int|None): state group id, if the state has been stored + as a state group. This is usually only None if e.g. the event is + an outlier. rejected (bool|str): A rejection reason if the event was rejected, else False diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8ee9434c9b..643e813b1f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1831,8 +1831,8 @@ class FederationHandler(BaseHandler): current_state = set(e.event_id for e in auth_events.values()) different_auth = event_auth_events - current_state - self._update_context_for_auth_events( - context, auth_events, event_key, + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, ) if different_auth and not event.internal_metadata.is_outlier(): @@ -1913,8 +1913,8 @@ class FederationHandler(BaseHandler): # 4. Look at rejects and their proofs. # TODO. - self._update_context_for_auth_events( - context, auth_events, event_key, + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, ) try: @@ -1923,11 +1923,15 @@ class FederationHandler(BaseHandler): logger.warn("Failed auth resolution for %r because %s", event, e) raise e - def _update_context_for_auth_events(self, context, auth_events, + @defer.inlineCallbacks + def _update_context_for_auth_events(self, event, context, auth_events, event_key): - """Update the state_ids in an event context after auth event resolution + """Update the state_ids in an event context after auth event resolution, + storing the changes as a new state group. Args: + event (Event): The event we're handling the context for + context (synapse.events.snapshot.EventContext): event context to be updated @@ -1950,7 +1954,13 @@ class FederationHandler(BaseHandler): context.prev_state_ids.update({ k: a.event_id for k, a in auth_events.iteritems() }) - context.state_group = self.store.get_next_state_group() + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=context.prev_group, + delta_ids=context.delta_ids, + current_state_ids=context.current_state_ids, + ) @defer.inlineCallbacks def construct_auth_difference(self, local_auth, remote_auth): diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 29d7296b43..8acb5df0f3 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.roommember import RoomMemberStore -from synapse.storage.state import StateGroupReadStore +from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore @@ -37,7 +37,7 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupReadStore, BaseSlavedStore): +class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) diff --git a/synapse/state.py b/synapse/state.py index 273f9911ca..cc93bbcb6b 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -183,8 +183,15 @@ class StateHandler(object): def compute_event_context(self, event, old_state=None): """Build an EventContext structure for the event. + This works out what the current state should be for the event, and + generates a new state group if necessary. + Args: event (synapse.events.EventBase): + old_state (dict|None): The state at the event if it can't be + calculated from existing events. This is normally only specified + when receiving an event from federation where we don't have the + prev events for, e.g. when backfilling. Returns: synapse.events.snapshot.EventContext: """ @@ -208,15 +215,22 @@ class StateHandler(object): context.current_state_ids = {} context.prev_state_ids = {} context.prev_state_events = [] - context.state_group = self.store.get_next_state_group() + + # We don't store state for outliers, so we don't generate a state + # froup for it. + context.state_group = None + defer.returnValue(context) if old_state: + # We already have the state, so we don't need to calculate it. + # Let's just correctly fill out the context and create a + # new state group for it. + context = EventContext() context.prev_state_ids = { (s.type, s.state_key): s.event_id for s in old_state } - context.state_group = self.store.get_next_state_group() if event.is_state(): key = (event.type, event.state_key) @@ -229,6 +243,14 @@ class StateHandler(object): else: context.current_state_ids = context.prev_state_ids + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=None, + delta_ids=None, + current_state_ids=context.current_state_ids, + ) + context.prev_state_events = [] defer.returnValue(context) @@ -242,7 +264,8 @@ class StateHandler(object): context = EventContext() context.prev_state_ids = curr_state if event.is_state(): - context.state_group = self.store.get_next_state_group() + # If this is a state event then we need to create a new state + # group for the state after this event. key = (event.type, event.state_key) if key in context.prev_state_ids: @@ -253,23 +276,42 @@ class StateHandler(object): context.current_state_ids[key] = event.event_id if entry.state_group: + # If the state at the event has a state group assigned then + # we can use that as the prev group context.prev_group = entry.state_group context.delta_ids = { key: event.event_id } elif entry.prev_group: + # If the state at the event only has a prev group, then we can + # use that as a prev group too. context.prev_group = entry.prev_group context.delta_ids = dict(entry.delta_ids) context.delta_ids[key] = event.event_id + + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=context.prev_group, + delta_ids=context.delta_ids, + current_state_ids=context.current_state_ids, + ) else: + context.current_state_ids = context.prev_state_ids + context.prev_group = entry.prev_group + context.delta_ids = entry.delta_ids + if entry.state_group is None: - entry.state_group = self.store.get_next_state_group() + entry.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=entry.prev_group, + delta_ids=entry.delta_ids, + current_state_ids=context.current_state_ids, + ) entry.state_id = entry.state_group context.state_group = entry.state_group - context.current_state_ids = context.prev_state_ids - context.prev_group = entry.prev_group - context.delta_ids = entry.delta_ids context.prev_state_events = [] defer.returnValue(context) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index d01d46338a..f8fbd02ceb 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -124,7 +124,6 @@ class DataStore(RoomMemberStore, RoomStore, ) self._transaction_id_gen = IdGenerator(db_conn, "sent_transactions", "id") - self._state_groups_id_gen = IdGenerator(db_conn, "state_groups", "id") self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id") self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index a6ae79dfad..8a0386c1a4 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -62,3 +62,9 @@ class PostgresEngine(object): def lock_table(self, txn, table): txn.execute("LOCK TABLE %s in EXCLUSIVE MODE" % (table,)) + + def get_next_state_group_id(self, txn): + """Returns an int that can be used as a new state_group ID + """ + txn.execute("SELECT nextval('state_group_id_seq')") + return txn.fetchone()[0] diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite3.py index 755c9a1f07..60f0fa7fb3 100644 --- a/synapse/storage/engines/sqlite3.py +++ b/synapse/storage/engines/sqlite3.py @@ -16,6 +16,7 @@ from synapse.storage.prepare_database import prepare_database import struct +import threading class Sqlite3Engine(object): @@ -24,6 +25,11 @@ class Sqlite3Engine(object): def __init__(self, database_module, database_config): self.module = database_module + # The current max state_group, or None if we haven't looked + # in the DB yet. + self._current_state_group_id = None + self._current_state_group_id_lock = threading.Lock() + def check_database(self, txn): pass @@ -43,6 +49,19 @@ class Sqlite3Engine(object): def lock_table(self, txn, table): return + def get_next_state_group_id(self, txn): + """Returns an int that can be used as a new state_group ID + """ + # We do application locking here since if we're using sqlite then + # we are a single process synapse. + with self._current_state_group_id_lock: + if self._current_state_group_id is None: + txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups") + self._current_state_group_id = txn.fetchone()[0] + + self._current_state_group_id += 1 + return self._current_state_group_id + # Following functions taken from: https://github.com/coleifer/peewee diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2fead9eb0f..af56f1ee57 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -755,9 +755,8 @@ class EventsStore(SQLBaseStore): events_and_contexts=events_and_contexts, ) - # Insert into the state_groups, state_groups_state, and - # event_to_state_groups tables. - self._store_mult_state_groups_txn(txn, events_and_contexts) + # Insert into event_to_state_groups. + self._store_event_state_mappings_txn(txn, events_and_contexts) # _store_rejected_events_txn filters out any events which were # rejected, and returns the filtered list. @@ -992,10 +991,9 @@ class EventsStore(SQLBaseStore): # an outlier in the database. We now have some state at that # so we need to update the state_groups table with that state. - # insert into the state_group, state_groups_state and - # event_to_state_groups tables. + # insert into event_to_state_groups. try: - self._store_mult_state_groups_txn(txn, ((event, context),)) + self._store_event_state_mappings_txn(txn, ((event, context),)) except Exception: logger.exception("") raise diff --git a/synapse/storage/schema/delta/47/state_group_seq.py b/synapse/storage/schema/delta/47/state_group_seq.py new file mode 100644 index 0000000000..f6766501d2 --- /dev/null +++ b/synapse/storage/schema/delta/47/state_group_seq.py @@ -0,0 +1,37 @@ +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.storage.engines import PostgresEngine + + +def run_create(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + # if we already have some state groups, we want to start making new + # ones with a higher id. + cur.execute("SELECT max(id) FROM state_groups") + row = cur.fetchone() + + if row[0] is None: + start_val = 1 + else: + start_val = row[0] + 1 + + cur.execute( + "CREATE SEQUENCE state_group_id_seq START WITH %s", + (start_val, ), + ) + + +def run_upgrade(*args, **kwargs): + pass diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 360e3e4355..adb48df73e 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -42,11 +42,8 @@ class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delt return len(self.delta_ids) if self.delta_ids else 0 -class StateGroupReadStore(SQLBaseStore): - """The read-only parts of StateGroupStore - - None of these functions write to the state tables, so are suitable for - including in the SlavedStores. +class StateGroupWorkerStore(SQLBaseStore): + """The parts of StateGroupStore that can be called from workers. """ STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" @@ -54,7 +51,7 @@ class StateGroupReadStore(SQLBaseStore): CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" def __init__(self, db_conn, hs): - super(StateGroupReadStore, self).__init__(db_conn, hs) + super(StateGroupWorkerStore, self).__init__(db_conn, hs) self._state_group_cache = DictionaryCache( "*stateGroupCache*", 100000 * CACHE_SIZE_FACTOR @@ -549,116 +546,66 @@ class StateGroupReadStore(SQLBaseStore): defer.returnValue(results) + def store_state_group(self, event_id, room_id, prev_group, delta_ids, + current_state_ids): + """Store a new set of state, returning a newly assigned state group. -class StateStore(StateGroupReadStore, BackgroundUpdateStore): - """ Keeps track of the state at a given event. - - This is done by the concept of `state groups`. Every event is a assigned - a state group (identified by an arbitrary string), which references a - collection of state events. The current state of an event is then the - collection of state events referenced by the event's state group. - - Hence, every change in the current state causes a new state group to be - generated. However, if no change happens (e.g., if we get a message event - with only one parent it inherits the state group from its parent.) - - There are three tables: - * `state_groups`: Stores group name, first event with in the group and - room id. - * `event_to_state_groups`: Maps events to state groups. - * `state_groups_state`: Maps state group to state events. - """ - - STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" - STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" - CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" - - def __init__(self, db_conn, hs): - super(StateStore, self).__init__(db_conn, hs) - self.register_background_update_handler( - self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, - self._background_deduplicate_state, - ) - self.register_background_update_handler( - self.STATE_GROUP_INDEX_UPDATE_NAME, - self._background_index_state, - ) - self.register_background_index_update( - self.CURRENT_STATE_INDEX_UPDATE_NAME, - index_name="current_state_events_member_index", - table="current_state_events", - columns=["state_key"], - where_clause="type='m.room.member'", - ) - - def _have_persisted_state_group_txn(self, txn, state_group): - txn.execute( - "SELECT count(*) FROM state_groups WHERE id = ?", - (state_group,) - ) - row = txn.fetchone() - return row and row[0] - - def _store_mult_state_groups_txn(self, txn, events_and_contexts): - state_groups = {} - for event, context in events_and_contexts: - if event.internal_metadata.is_outlier(): - continue + Args: + event_id (str): The event ID for which the state was calculated + room_id (str) + prev_group (int|None): A previous state group for the room, optional. + delta_ids (dict|None): The delta between state at `prev_group` and + `current_state_ids`, if `prev_group` was given. Same format as + `current_state_ids`. + current_state_ids (dict): The state to store. Map of (type, state_key) + to event_id. - if context.current_state_ids is None: + Returns: + Deferred[int]: The state group ID + """ + def _store_state_group_txn(txn): + if current_state_ids is None: # AFAIK, this can never happen - logger.error( - "Non-outlier event %s had current_state_ids==None", - event.event_id) - continue + raise Exception("current_state_ids cannot be None") - # if the event was rejected, just give it the same state as its - # predecessor. - if context.rejected: - state_groups[event.event_id] = context.prev_group - continue - - state_groups[event.event_id] = context.state_group - - if self._have_persisted_state_group_txn(txn, context.state_group): - continue + state_group = self.database_engine.get_next_state_group_id(txn) self._simple_insert_txn( txn, table="state_groups", values={ - "id": context.state_group, - "room_id": event.room_id, - "event_id": event.event_id, + "id": state_group, + "room_id": room_id, + "event_id": event_id, }, ) # We persist as a delta if we can, while also ensuring the chain # of deltas isn't tooo long, as otherwise read performance degrades. - if context.prev_group: + if prev_group: is_in_db = self._simple_select_one_onecol_txn( txn, table="state_groups", - keyvalues={"id": context.prev_group}, + keyvalues={"id": prev_group}, retcol="id", allow_none=True, ) if not is_in_db: raise Exception( "Trying to persist state with unpersisted prev_group: %r" - % (context.prev_group,) + % (prev_group,) ) potential_hops = self._count_state_group_hops_txn( - txn, context.prev_group + txn, prev_group ) - if context.prev_group and potential_hops < MAX_STATE_DELTA_HOPS: + if prev_group and potential_hops < MAX_STATE_DELTA_HOPS: self._simple_insert_txn( txn, table="state_group_edges", values={ - "state_group": context.state_group, - "prev_state_group": context.prev_group, + "state_group": state_group, + "prev_state_group": prev_group, }, ) @@ -667,13 +614,13 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): table="state_groups_state", values=[ { - "state_group": context.state_group, - "room_id": event.room_id, + "state_group": state_group, + "room_id": room_id, "type": key[0], "state_key": key[1], "event_id": state_id, } - for key, state_id in context.delta_ids.iteritems() + for key, state_id in delta_ids.iteritems() ], ) else: @@ -682,13 +629,13 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): table="state_groups_state", values=[ { - "state_group": context.state_group, - "room_id": event.room_id, + "state_group": state_group, + "room_id": room_id, "type": key[0], "state_key": key[1], "event_id": state_id, } - for key, state_id in context.current_state_ids.iteritems() + for key, state_id in current_state_ids.iteritems() ], ) @@ -699,11 +646,71 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): txn.call_after( self._state_group_cache.update, self._state_group_cache.sequence, - key=context.state_group, - value=dict(context.current_state_ids), + key=state_group, + value=dict(current_state_ids), full=True, ) + return state_group + + return self.runInteraction("store_state_group", _store_state_group_txn) + + +class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): + """ Keeps track of the state at a given event. + + This is done by the concept of `state groups`. Every event is a assigned + a state group (identified by an arbitrary string), which references a + collection of state events. The current state of an event is then the + collection of state events referenced by the event's state group. + + Hence, every change in the current state causes a new state group to be + generated. However, if no change happens (e.g., if we get a message event + with only one parent it inherits the state group from its parent.) + + There are three tables: + * `state_groups`: Stores group name, first event with in the group and + room id. + * `event_to_state_groups`: Maps events to state groups. + * `state_groups_state`: Maps state group to state events. + """ + + STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" + STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" + CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" + + def __init__(self, db_conn, hs): + super(StateStore, self).__init__(db_conn, hs) + self.register_background_update_handler( + self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, + self._background_deduplicate_state, + ) + self.register_background_update_handler( + self.STATE_GROUP_INDEX_UPDATE_NAME, + self._background_index_state, + ) + self.register_background_index_update( + self.CURRENT_STATE_INDEX_UPDATE_NAME, + index_name="current_state_events_member_index", + table="current_state_events", + columns=["state_key"], + where_clause="type='m.room.member'", + ) + + def _store_event_state_mappings_txn(self, txn, events_and_contexts): + state_groups = {} + for event, context in events_and_contexts: + if event.internal_metadata.is_outlier(): + continue + + # if the event was rejected, just give it the same state as its + # predecessor. + if context.rejected: + state_groups[event.event_id] = context.prev_group + continue + + state_groups[event.event_id] = context.state_group + self._simple_insert_many_txn( txn, table="event_to_state_groups", @@ -763,9 +770,6 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): return count - def get_next_state_group(self): - return self._state_groups_id_gen.get_next() - @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): """This background update will slowly deduplicate state by reencoding diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 105e1228bb..f430cce931 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -226,11 +226,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): context = EventContext() context.current_state_ids = state_ids context.prev_state_ids = state_ids - elif not backfill: + else: state_handler = self.hs.get_state_handler() context = yield state_handler.compute_event_context(event) - else: - context = EventContext() context.push_actions = push_actions diff --git a/tests/test_state.py b/tests/test_state.py index d16e1b3b8b..a5c5e55951 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -80,14 +80,14 @@ class StateGroupStore(object): return defer.succeed(groups) - def store_state_groups(self, event, context): - if context.current_state_ids is None: - return + def store_state_group(self, event_id, room_id, prev_group, delta_ids, + current_state_ids): + state_group = self._next_group + self._next_group += 1 - state_events = dict(context.current_state_ids) + self._group_to_state[state_group] = dict(current_state_ids) - self._group_to_state[context.state_group] = state_events - self._event_to_state_group[event.event_id] = context.state_group + return state_group def get_events(self, event_ids, **kwargs): return { @@ -95,10 +95,19 @@ class StateGroupStore(object): if e_id in self._event_id_to_event } + def get_state_group_delta(self, name): + return (None, None) + def register_events(self, events): for e in events: self._event_id_to_event[e.event_id] = e + def register_event_context(self, event, context): + self._event_to_state_group[event.event_id] = context.state_group + + def register_event_id_state_group(self, event_id, state_group): + self._event_to_state_group[event_id] = state_group + class DictObj(dict): def __init__(self, **kwargs): @@ -137,15 +146,7 @@ class Graph(object): class StateTestCase(unittest.TestCase): def setUp(self): - self.store = Mock( - spec_set=[ - "get_state_groups_ids", - "add_event_hashes", - "get_events", - "get_next_state_group", - "get_state_group_delta", - ] - ) + self.store = StateGroupStore() hs = Mock(spec_set=[ "get_datastore", "get_auth", "get_state_handler", "get_clock", "get_state_resolution_handler", @@ -156,9 +157,6 @@ class StateTestCase(unittest.TestCase): hs.get_auth.return_value = Auth(hs) hs.get_state_resolution_handler = lambda: StateResolutionHandler(hs) - self.store.get_next_state_group.side_effect = Mock - self.store.get_state_group_delta.return_value = (None, None) - self.state = StateHandler(hs) self.event_id = 0 @@ -197,14 +195,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertEqual(2, len(context_store["D"].prev_state_ids)) @@ -249,16 +246,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -315,16 +309,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -398,16 +389,13 @@ class StateTestCase(unittest.TestCase): self._add_depths(nodes, edges) graph = Graph(nodes, edges) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -467,7 +455,11 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_trivial_annotate_message(self): - event = create_event(type="test_message", name="event") + prev_event_id = "prev_event_id" + event = create_event( + type="test_message", name="event2", + prev_events=[(prev_event_id, {})], + ) old_state = [ create_event(type="test1", state_key="1"), @@ -475,11 +467,11 @@ class StateTestCase(unittest.TestCase): create_event(type="test2", state_key=""), ] - group_name = "group_name_1" - - self.store.get_state_groups_ids.return_value = { - group_name: {(e.type, e.state_key): e.event_id for e in old_state}, - } + group_name = self.store.store_state_group( + prev_event_id, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state}, + ) + self.store.register_event_id_state_group(prev_event_id, group_name) context = yield self.state.compute_event_context(event) @@ -492,7 +484,11 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_trivial_annotate_state(self): - event = create_event(type="state", state_key="", name="event") + prev_event_id = "prev_event_id" + event = create_event( + type="state", state_key="", name="event2", + prev_events=[(prev_event_id, {})], + ) old_state = [ create_event(type="test1", state_key="1"), @@ -500,11 +496,11 @@ class StateTestCase(unittest.TestCase): create_event(type="test2", state_key=""), ] - group_name = "group_name_1" - - self.store.get_state_groups_ids.return_value = { - group_name: {(e.type, e.state_key): e.event_id for e in old_state}, - } + group_name = self.store.store_state_group( + prev_event_id, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state}, + ) + self.store.register_event_id_state_group(prev_event_id, group_name) context = yield self.state.compute_event_context(event) @@ -517,7 +513,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_resolve_message_conflict(self): - event = create_event(type="test_message", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test_message", name="event3", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) creation = create_event( type=EventTypes.Create, state_key="" @@ -537,12 +538,12 @@ class StateTestCase(unittest.TestCase): create_event(type="test4", state_key=""), ] - store = StateGroupStore() - store.register_events(old_state_1) - store.register_events(old_state_2) - self.store.get_events = store.get_events + self.store.register_events(old_state_1) + self.store.register_events(old_state_2) - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual(len(context.current_state_ids), 6) @@ -550,7 +551,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_resolve_state_conflict(self): - event = create_event(type="test4", state_key="", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test4", state_key="", name="event", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) creation = create_event( type=EventTypes.Create, state_key="" @@ -575,7 +581,9 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_2) self.store.get_events = store.get_events - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual(len(context.current_state_ids), 6) @@ -583,7 +591,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_standard_depth_conflict(self): - event = create_event(type="test4", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test4", name="event", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) member_event = create_event( type=EventTypes.Member, @@ -615,7 +628,9 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_2) self.store.get_events = store.get_events - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual( old_state_2[2].event_id, context.current_state_ids[("test1", "1")] @@ -639,19 +654,26 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_1) store.register_events(old_state_2) - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual( old_state_1[2].event_id, context.current_state_ids[("test1", "1")] ) - def _get_context(self, event, old_state_1, old_state_2): - group_name_1 = "group_name_1" - group_name_2 = "group_name_2" + def _get_context(self, event, prev_event_id_1, old_state_1, prev_event_id_2, + old_state_2): + sg1 = self.store.store_state_group( + prev_event_id_1, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state_1}, + ) + self.store.register_event_id_state_group(prev_event_id_1, sg1) - self.store.get_state_groups_ids.return_value = { - group_name_1: {(e.type, e.state_key): e.event_id for e in old_state_1}, - group_name_2: {(e.type, e.state_key): e.event_id for e in old_state_2}, - } + sg2 = self.store.store_state_group( + prev_event_id_2, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state_2}, + ) + self.store.register_event_id_state_group(prev_event_id_2, sg2) return self.state.compute_event_context(event) -- cgit 1.5.1 From 671540dccf3996620ffe65705904fb911e21fb68 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:27:08 +0000 Subject: rename delete_old_state -> purge_history (beacause it deletes more than state) --- synapse/handlers/message.py | 2 +- synapse/storage/events.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21f1717dd2..1c7860bb05 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.delete_old_state(room_id, depth) + yield self.store.purge_history(room_id, depth) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a9cd3ec90..21533970d1 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,16 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def delete_old_state(self, room_id, topological_ordering): + def purge_history(self, room_id, topological_ordering): + """Deletes room history before a certain point + """ + return self.runInteraction( - "delete_old_state", - self._delete_old_state_txn, room_id, topological_ordering + "purge_history", + self._purge_history_txn, room_id, topological_ordering ) - def _delete_old_state_txn(self, txn, room_id, topological_ordering): - """Deletes old room state - """ - + def _purge_history_txn(self, txn, room_id, topological_ordering): # Tables that should be pruned: # event_auth # event_backward_extremities -- cgit 1.5.1 From 61ffaa8137ac962f84a077bb53c4a1b06b21b49b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:34:35 +0000 Subject: bump purge logging to info this thing takes ages and the only sign of any progress is the logs, so having some logs is useful. --- synapse/storage/events.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 21533970d1..803a4e2477 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2081,7 +2081,7 @@ class EventsStore(SQLBaseStore): 400, "topological_ordering is greater than forward extremeties" ) - logger.debug("[purge] looking for events to delete") + logger.info("[purge] looking for events to delete") txn.execute( "SELECT event_id, state_key FROM events" @@ -2102,7 +2102,7 @@ class EventsStore(SQLBaseStore): for event_id, state_key in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.debug("[purge] Finding new backward extremities") + logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding # all events that point to events that are to be purged @@ -2116,7 +2116,7 @@ class EventsStore(SQLBaseStore): ) new_backwards_extrems = txn.fetchall() - logger.debug("[purge] replacing backward extremities: %r", new_backwards_extrems) + logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems) txn.execute( "DELETE FROM event_backward_extremities WHERE room_id = ?", @@ -2132,7 +2132,7 @@ class EventsStore(SQLBaseStore): ] ) - logger.debug("[purge] finding redundant state groups") + logger.info("[purge] finding redundant state groups") # Get all state groups that are only referenced by events that are # to be deleted. @@ -2149,15 +2149,15 @@ class EventsStore(SQLBaseStore): ) state_rows = txn.fetchall() - logger.debug("[purge] found %i redundant state groups", len(state_rows)) + logger.info("[purge] found %i redundant state groups", len(state_rows)) # make a set of the redundant state groups, so that we can look them up # efficiently state_groups_to_delete = set([sg for sg, in state_rows]) # Now we get all the state groups that rely on these state groups - logger.debug("[purge] finding state groups which depend on redundant" - " state groups") + logger.info("[purge] finding state groups which depend on redundant" + " state groups") remaining_state_groups = [] for i in xrange(0, len(state_rows), 100): chunk = [sg for sg, in state_rows[i:i + 100]] @@ -2182,7 +2182,7 @@ class EventsStore(SQLBaseStore): # Now we turn the state groups that reference to-be-deleted state # groups to non delta versions. for sg in remaining_state_groups: - logger.debug("[purge] de-delta-ing remaining state group %s", sg) + logger.info("[purge] de-delta-ing remaining state group %s", sg) curr_state = self._get_state_groups_from_groups_txn( txn, [sg], types=None ) @@ -2219,7 +2219,7 @@ class EventsStore(SQLBaseStore): ], ) - logger.debug("[purge] removing redundant state groups") + logger.info("[purge] removing redundant state groups") txn.executemany( "DELETE FROM state_groups_state WHERE state_group = ?", state_rows @@ -2230,13 +2230,13 @@ class EventsStore(SQLBaseStore): ) # Delete all non-state - logger.debug("[purge] removing events from event_to_state_groups") + logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) - logger.debug("[purge] updating room_depth") + logger.info("[purge] updating room_depth") txn.execute( "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", (topological_ordering, room_id,) @@ -2258,7 +2258,8 @@ class EventsStore(SQLBaseStore): "event_signatures", "rejections", ): - logger.debug("[purge] removing remote non-state events from %s", table) + logger.info("[purge] removing remote non-state events from %s", + table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), @@ -2266,7 +2267,7 @@ class EventsStore(SQLBaseStore): ) # Mark all state and own events as outliers - logger.debug("[purge] marking remaining events as outliers") + logger.info("[purge] marking remaining events as outliers") txn.executemany( "UPDATE events SET outlier = ?" " WHERE event_id = ?", -- cgit 1.5.1 From e571aef06d3b1af3946e790841f4b8a3a4cfdebf Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:40:29 +0000 Subject: purge: Move cache invalidation to more appropriate place it was a bit of a non-sequitur there --- synapse/storage/events.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 803a4e2477..24d9978304 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2099,9 +2099,6 @@ class EventsStore(SQLBaseStore): "[purge] found %i events before cutoff, of which %i are remote" " non-state events to delete", len(event_rows), len(to_delete)) - for event_id, state_key in event_rows: - txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding @@ -2229,12 +2226,15 @@ class EventsStore(SQLBaseStore): state_rows ) - # Delete all non-state logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) + for event_id, _ in event_rows: + txn.call_after(self._get_state_group_for_event.invalidate, ( + event_id, + )) logger.info("[purge] updating room_depth") txn.execute( -- cgit 1.5.1 From 74fcbf741b3a7b95b5cc44478050e8a40fb7dc46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 8 Feb 2018 18:44:52 +0000 Subject: delete_local_events for purge_history Add a flag which makes the purger delete local events --- docs/admin_api/purge_history_api.rst | 14 ++++++++++++-- synapse/handlers/message.py | 4 ++-- synapse/http/servlet.py | 18 +++++++++++++++--- synapse/rest/client/v1/admin.py | 11 ++++++++++- synapse/storage/events.py | 35 ++++++++++++++++++++++++++++------- 5 files changed, 67 insertions(+), 15 deletions(-) (limited to 'synapse/storage') diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index 08b3306366..b4e5bd9d75 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -4,8 +4,6 @@ Purge History API The purge history API allows server admins to purge historic events from their database, reclaiming disk space. -**NB!** This will not delete local events (locally sent messages content etc) from the database, but will remove lots of the metadata about them and does dramatically reduce the on disk space usage - Depending on the amount of history being purged a call to the API may take several minutes or longer. During this period users will not be able to paginate further back in the room from the point being purged from. @@ -15,3 +13,15 @@ The API is simply: ``POST /_matrix/client/r0/admin/purge_history//`` including an ``access_token`` of a server admin. + +By default, events sent by local users are not deleted, as they may represent +the only copies of this content in existence. (Events sent by remote users are +deleted, and room state data before the cutoff is always removed). + +To delete local events as well, set ``delete_local_events`` in the body: + +.. code:: json + + { + "delete_local_events": True, + } diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1c7860bb05..276d1a7722 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -63,7 +63,7 @@ class MessageHandler(BaseHandler): self.spam_checker = hs.get_spam_checker() @defer.inlineCallbacks - def purge_history(self, room_id, event_id): + def purge_history(self, room_id, event_id, delete_local_events=False): event = yield self.store.get_event(event_id) if event.room_id != room_id: @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history(room_id, depth) + yield self.store.purge_history(room_id, depth, delete_local_events) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 71420e54db..ef8e62901b 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -148,11 +148,13 @@ def parse_string_from_args(args, name, default=None, required=False, return default -def parse_json_value_from_request(request): +def parse_json_value_from_request(request, allow_empty_body=False): """Parse a JSON value from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into None Returns: The JSON value. @@ -165,6 +167,9 @@ def parse_json_value_from_request(request): except Exception: raise SynapseError(400, "Error reading JSON content.") + if not content_bytes and allow_empty_body: + return None + try: content = simplejson.loads(content_bytes) except Exception as e: @@ -174,17 +179,24 @@ def parse_json_value_from_request(request): return content -def parse_json_object_from_request(request): +def parse_json_object_from_request(request, allow_empty_body=False): """Parse a JSON object from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into an empty dict. Raises: SynapseError if the request body couldn't be decoded as JSON or if it wasn't a JSON object. """ - content = parse_json_value_from_request(request) + content = parse_json_value_from_request( + request, allow_empty_body=allow_empty_body, + ) + + if allow_empty_body and content is None: + return {} if type(content) != dict: message = "Content must be a JSON object." diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 5022808ea9..f954d2ea65 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -128,7 +128,16 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): if not is_admin: raise AuthError(403, "You are not a server admin") - yield self.handlers.message_handler.purge_history(room_id, event_id) + body = parse_json_object_from_request(request, allow_empty_body=True) + + delete_local_events = bool( + body.get("delete_local_history", False) + ) + + yield self.handlers.message_handler.purge_history( + room_id, event_id, + delete_local_events=delete_local_events, + ) defer.returnValue((200, {})) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 24d9978304..11a2ff2d8a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,32 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def purge_history(self, room_id, topological_ordering): + def purge_history( + self, room_id, topological_ordering, delete_local_events, + ): """Deletes room history before a certain point + + Args: + room_id (str): + + topological_ordering (int): + minimum topo ordering to preserve + + delete_local_events (bool): + if True, we will delete local events as well as remote ones + (instead of just marking them as outliers and deleting their + state groups). """ return self.runInteraction( "purge_history", - self._purge_history_txn, room_id, topological_ordering + self._purge_history_txn, room_id, topological_ordering, + delete_local_events, ) - def _purge_history_txn(self, txn, room_id, topological_ordering): + def _purge_history_txn( + self, txn, room_id, topological_ordering, delete_local_events, + ): # Tables that should be pruned: # event_auth # event_backward_extremities @@ -2093,11 +2109,14 @@ class EventsStore(SQLBaseStore): to_delete = [ (event_id,) for event_id, state_key in event_rows - if state_key is None and not self.hs.is_mine_id(event_id) + if state_key is None and ( + delete_local_events or not self.hs.is_mine_id(event_id) + ) ] logger.info( - "[purge] found %i events before cutoff, of which %i are remote" - " non-state events to delete", len(event_rows), len(to_delete)) + "[purge] found %i events before cutoff, of which %i can be deleted", + len(event_rows), len(to_delete), + ) logger.info("[purge] Finding new backward extremities") @@ -2273,7 +2292,9 @@ class EventsStore(SQLBaseStore): " WHERE event_id = ?", [ (True, event_id,) for event_id, state_key in event_rows - if state_key is not None or self.hs.is_mine_id(event_id) + if state_key is not None or ( + not delete_local_events and self.hs.is_mine_id(event_id) + ) ] ) -- cgit 1.5.1 From 39a6b3549638c70e3aaf51b361576fbd729eb655 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Feb 2018 12:13:34 +0000 Subject: purge: move room_depth update to end ... to avoid locking the table for too long --- synapse/storage/events.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 11a2ff2d8a..238a2006b8 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2255,12 +2255,6 @@ class EventsStore(SQLBaseStore): event_id, )) - logger.info("[purge] updating room_depth") - txn.execute( - "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", - (topological_ordering, room_id,) - ) - # Delete all remote non-state events for table in ( "events", @@ -2298,6 +2292,18 @@ class EventsStore(SQLBaseStore): ] ) + # synapse tries to take out an exclusive lock on room_depth whenever it + # persists events (because upsert), and once we run this update, we + # will block that for the rest of our transaction. + # + # So, let's stick it at the end so that we don't block event + # persistence. + logger.info("[purge] updating room_depth") + txn.execute( + "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", + (topological_ordering, room_id,) + ) + logger.info("[purge] done") @defer.inlineCallbacks -- cgit 1.5.1