From baafb85ba461b7d5073de94422fed0c43a417f46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 May 2017 11:57:02 +0100 Subject: Add an index to event_search - to make the purge API quicker --- synapse/storage/background_updates.py | 10 +++++++--- synapse/storage/events.py | 11 +++++++++++ .../schema/delta/41/event_search_event_id_idx.sql | 17 +++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 synapse/storage/schema/delta/41/event_search_event_id_idx.sql (limited to 'synapse') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index d4cf0fc59b..12a8b8259c 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -210,7 +210,8 @@ class BackgroundUpdateStore(SQLBaseStore): self._background_update_handlers[update_name] = update_handler def register_background_index_update(self, update_name, index_name, - table, columns, where_clause=None): + table, columns, where_clause=None, + unique=False): """Helper for store classes to do a background index addition To use: @@ -245,9 +246,11 @@ class BackgroundUpdateStore(SQLBaseStore): c.execute(sql) sql = ( - "CREATE INDEX CONCURRENTLY %(name)s ON %(table)s" + "CREATE %(unique)s INDEX CONCURRENTLY %(name)s" + " ON %(table)s" " (%(columns)s) %(where_clause)s" ) % { + "unique": "UNIQUE" if unique else "", "name": index_name, "table": table, "columns": ", ".join(columns), @@ -270,9 +273,10 @@ class BackgroundUpdateStore(SQLBaseStore): # down at the wrong moment - hance we use IF NOT EXISTS. (SQLite # has supported CREATE TABLE|INDEX IF NOT EXISTS since 3.3.0.) sql = ( - "CREATE INDEX IF NOT EXISTS %(name)s ON %(table)s" + "CREATE %(unique)s INDEX IF NOT EXISTS %(name)s ON %(table)s" " (%(columns)s)" ) % { + "unique": "UNIQUE" if unique else "", "name": index_name, "table": table, "columns": ", ".join(columns), diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dbd63078c6..1fae1aeacf 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -207,6 +207,17 @@ class EventsStore(SQLBaseStore): where_clause="contains_url = true AND outlier = false", ) + # an event_id index on event_search is useful for the purge_history + # api. Plus it means we get to enforce some integrity with a UNIQUE + # clause + self.register_background_index_update( + "event_search_event_id_idx", + index_name="event_search_event_id_idx", + table="event_search", + columns=["event_id"], + unique=True, + ) + self._event_persist_queue = _EventPeristenceQueue() def persist_events(self, events_and_contexts, backfilled=False): diff --git a/synapse/storage/schema/delta/41/event_search_event_id_idx.sql b/synapse/storage/schema/delta/41/event_search_event_id_idx.sql new file mode 100644 index 0000000000..5d9cfecf36 --- /dev/null +++ b/synapse/storage/schema/delta/41/event_search_event_id_idx.sql @@ -0,0 +1,17 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('event_search_event_id_idx', '{}'); -- cgit 1.4.1 From 114f2909479b4396f3da8cff651990f075b4bfba Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 May 2017 12:06:28 +0100 Subject: Add more logging for purging Log the number of events we will be deleting at info. --- synapse/storage/events.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1fae1aeacf..627e91a522 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2033,6 +2033,8 @@ class EventsStore(SQLBaseStore): 400, "topological_ordering is greater than forward extremeties" ) + logger.debug("[purge] looking for events to delete") + txn.execute( "SELECT event_id, state_key FROM events" " LEFT JOIN state_events USING (room_id, event_id)" @@ -2041,6 +2043,14 @@ class EventsStore(SQLBaseStore): ) event_rows = txn.fetchall() + to_delete = [ + (event_id,) for event_id, state_key in event_rows + if state_key is None and not self.hs.is_mine_id(event_id) + ] + logger.info( + "[purge] found %i events before cutoff, of which %i are remote" + " non-state events to delete", len(event_rows), len(to_delete)) + for event_id, state_key in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) @@ -2091,6 +2101,7 @@ class EventsStore(SQLBaseStore): ) state_rows = txn.fetchall() + logger.debug("[purge] found %i redundant state groups", len(state_rows)) # make a set of the redundant state groups, so that we can look them up # efficiently @@ -2184,10 +2195,6 @@ class EventsStore(SQLBaseStore): ) # Delete all remote non-state events - to_delete = [ - (event_id,) for event_id, state_key in event_rows - if state_key is None and not self.hs.is_mine_id(event_id) - ] for table in ( "events", "event_json", @@ -2203,7 +2210,7 @@ class EventsStore(SQLBaseStore): "event_signatures", "rejections", ): - logger.debug("[purge] removing non-state events from %s", table) + logger.debug("[purge] removing remote non-state events from %s", table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), @@ -2211,7 +2218,7 @@ class EventsStore(SQLBaseStore): ) # Mark all state and own events as outliers - logger.debug("[purge] marking events as outliers") + logger.debug("[purge] marking remaining events as outliers") txn.executemany( "UPDATE events SET outlier = ?" " WHERE event_id = ?", @@ -2221,7 +2228,7 @@ class EventsStore(SQLBaseStore): ] ) - logger.debug("[purge] done") + logger.info("[purge] done") @defer.inlineCallbacks def is_event_after(self, event_id1, event_id2): -- cgit 1.4.1 From 34194aaff7c86d57c6dabfb016b7597d999b2001 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 May 2017 12:46:55 +0100 Subject: Don't create event_search index on sqlite ... because the table is virtual --- synapse/storage/background_updates.py | 13 ++++++++++--- synapse/storage/events.py | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 12a8b8259c..7157fb1dfb 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -211,7 +211,8 @@ class BackgroundUpdateStore(SQLBaseStore): def register_background_index_update(self, update_name, index_name, table, columns, where_clause=None, - unique=False): + unique=False, + psql_only=False): """Helper for store classes to do a background index addition To use: @@ -227,6 +228,9 @@ class BackgroundUpdateStore(SQLBaseStore): index_name (str): name of index to add table (str): table to add index to columns (list[str]): columns/expressions to include in index + unique (bool): true to make a UNIQUE index + psql_only: true to only create this index on psql databases (useful + for virtual sqlite tables) """ def create_index_psql(conn): @@ -288,13 +292,16 @@ class BackgroundUpdateStore(SQLBaseStore): if isinstance(self.database_engine, engines.PostgresEngine): runner = create_index_psql + elif psql_only: + runner = None else: runner = create_index_sqlite @defer.inlineCallbacks def updater(progress, batch_size): - logger.info("Adding index %s to %s", index_name, table) - yield self.runWithConnection(runner) + if runner is not None: + logger.info("Adding index %s to %s", index_name, table) + yield self.runWithConnection(runner) yield self._end_background_update(update_name) defer.returnValue(1) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 627e91a522..ea6879c619 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -216,6 +216,7 @@ class EventsStore(SQLBaseStore): table="event_search", columns=["event_id"], unique=True, + psql_only=True, ) self._event_persist_queue = _EventPeristenceQueue() -- cgit 1.4.1 From ff3d810ea8e84a48508a08e6246c7d70739c94ea Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 May 2017 12:48:50 +0100 Subject: Add a comment to old delta --- synapse/storage/schema/delta/37/remove_auth_idx.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse') diff --git a/synapse/storage/schema/delta/37/remove_auth_idx.py b/synapse/storage/schema/delta/37/remove_auth_idx.py index 784f3b348f..20ad8bd5a6 100644 --- a/synapse/storage/schema/delta/37/remove_auth_idx.py +++ b/synapse/storage/schema/delta/37/remove_auth_idx.py @@ -36,6 +36,10 @@ DROP INDEX IF EXISTS transactions_have_ref; -- and is used incredibly rarely. DROP INDEX IF EXISTS events_order_topo_stream_room; +-- an equivalent index to this actually gets re-created in delta 41, because it +-- turned out that deleting it wasn't a great plan :/. In any case, let's +-- delete it here, and delta 41 will create a new one with an added UNIQUE +-- constraint DROP INDEX IF EXISTS event_search_ev_idx; """ -- cgit 1.4.1 From 9da4316ca574f2d552136941d92ce722fabfe29e Mon Sep 17 00:00:00 2001 From: Pablo Saavedra Date: Sat, 13 May 2017 18:17:54 +0200 Subject: Configurable maximum number of events requested by /sync and /messages (#2220) Set the limit on the returned events in the timeline in the get and sync operations. The default value is -1, means no upper limit. For example, using `filter_timeline_limit: 5000`: POST /_matrix/client/r0/user/user:id/filter { room: { timeline: { limit: 1000000000000000000 } } } GET /_matrix/client/r0/user/user:id/filter/filter:id { room: { timeline: { limit: 5000 } } } The server cuts down the room.timeline.limit. --- synapse/config/server.py | 6 ++++++ synapse/rest/client/v2_alpha/_base.py | 8 ++++++++ synapse/rest/client/v2_alpha/filter.py | 4 ++++ synapse/rest/client/v2_alpha/sync.py | 3 +++ 4 files changed, 21 insertions(+) (limited to 'synapse') diff --git a/synapse/config/server.py b/synapse/config/server.py index 25e6666238..3910b9dc31 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -35,6 +35,8 @@ class ServerConfig(Config): # "disable" federation self.send_federation = config.get("send_federation", True) + self.filter_timeline_limit = config.get("filter_timeline_limit", -1) + if self.public_baseurl is not None: if self.public_baseurl[-1] != '/': self.public_baseurl += '/' @@ -161,6 +163,10 @@ class ServerConfig(Config): # The GC threshold parameters to pass to `gc.set_threshold`, if defined # gc_thresholds: [700, 10, 10] + # Set the limit on the returned events in the timeline in the get + # and sync operations. The default value is -1, means no upper limit. + # filter_timeline_limit: 5000 + # List of ports that Synapse should listen on, their purpose and their # configuration. listeners: diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index 20e765f48f..279e2a7751 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -47,3 +47,11 @@ def client_v2_patterns(path_regex, releases=(0,), new_prefix = CLIENT_V2_ALPHA_PREFIX.replace("/v2_alpha", "/r%d" % release) patterns.append(re.compile("^" + new_prefix + path_regex)) return patterns + + +def set_timeline_upper_limit(filter_json, filter_timeline_limit): + if filter_timeline_limit < 0: + return # no upper limits + if 'room' in filter_json and 'limit' in filter_json['room']: + filter_json['room']["limit"] = min(filter_json['room']["limit"], + filter_timeline_limit) diff --git a/synapse/rest/client/v2_alpha/filter.py b/synapse/rest/client/v2_alpha/filter.py index b4084fec62..364d20d8e6 100644 --- a/synapse/rest/client/v2_alpha/filter.py +++ b/synapse/rest/client/v2_alpha/filter.py @@ -20,6 +20,7 @@ from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.types import UserID from ._base import client_v2_patterns +from ._base import set_timeline_upper_limit import logging @@ -85,6 +86,9 @@ class CreateFilterRestServlet(RestServlet): raise AuthError(403, "Can only create filters for local users") content = parse_json_object_from_request(request) + set_timeline_upper_limit(content, + self.hs.config.filter_timeline_limit) + filter_id = yield self.filtering.add_user_filter( user_localpart=target_user.localpart, user_filter=content, diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index f30eab76fd..f5e7349c5c 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -28,6 +28,7 @@ from synapse.api.filtering import FilterCollection, DEFAULT_FILTER_COLLECTION from synapse.api.errors import SynapseError from synapse.api.constants import PresenceState from ._base import client_v2_patterns +from ._base import set_timeline_upper_limit import itertools import logging @@ -121,6 +122,8 @@ class SyncRestServlet(RestServlet): if filter_id.startswith('{'): try: filter_object = json.loads(filter_id) + set_timeline_upper_limit(filter_object, + self.hs.config.filter_timeline_limit) except: raise SynapseError(400, "Invalid filter JSON") self.filtering.check_valid_filter(filter_object) -- cgit 1.4.1 From 627e6ea2b0b941c67f8752736993f82a5f123e76 Mon Sep 17 00:00:00 2001 From: Pablo Saavedra Date: Mon, 15 May 2017 14:51:43 +0200 Subject: Fixed implementation errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added HS as property in SyncRestServlet * Fixed set_timeline_upper_limit function implementat¡ion --- synapse/rest/client/v2_alpha/_base.py | 9 ++++++--- synapse/rest/client/v2_alpha/sync.py | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index 279e2a7751..fd8f915655 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -52,6 +52,9 @@ def client_v2_patterns(path_regex, releases=(0,), def set_timeline_upper_limit(filter_json, filter_timeline_limit): if filter_timeline_limit < 0: return # no upper limits - if 'room' in filter_json and 'limit' in filter_json['room']: - filter_json['room']["limit"] = min(filter_json['room']["limit"], - filter_timeline_limit) + if 'room' in filter_json \ + and 'timeline' in filter_json['room'] \ + and 'limit' in filter_json['room']['timeline']: + filter_json['room']['timeline']["limit"] = min( + filter_json['room']['timeline']['limit'], + filter_timeline_limit) diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index f5e7349c5c..771e127ab9 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -79,6 +79,7 @@ class SyncRestServlet(RestServlet): def __init__(self, hs): super(SyncRestServlet, self).__init__() + self.hs = hs self.auth = hs.get_auth() self.sync_handler = hs.get_sync_handler() self.clock = hs.get_clock() -- cgit 1.4.1 From 224137fcf98b424c6b5d1ab4b76c24b6213c7174 Mon Sep 17 00:00:00 2001 From: Pablo Saavedra Date: Mon, 15 May 2017 16:21:02 +0200 Subject: Fixed syntax nits --- synapse/rest/client/v2_alpha/_base.py | 5 ++--- synapse/rest/client/v2_alpha/filter.py | 6 ++++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index fd8f915655..1f5bc24cc3 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -52,9 +52,8 @@ def client_v2_patterns(path_regex, releases=(0,), def set_timeline_upper_limit(filter_json, filter_timeline_limit): if filter_timeline_limit < 0: return # no upper limits - if 'room' in filter_json \ - and 'timeline' in filter_json['room'] \ - and 'limit' in filter_json['room']['timeline']: + timeline = filter_json.get('room', {}).get('timeline', {}) + if 'limit' in timeline: filter_json['room']['timeline']["limit"] = min( filter_json['room']['timeline']['limit'], filter_timeline_limit) diff --git a/synapse/rest/client/v2_alpha/filter.py b/synapse/rest/client/v2_alpha/filter.py index 364d20d8e6..d2b2fd66e6 100644 --- a/synapse/rest/client/v2_alpha/filter.py +++ b/synapse/rest/client/v2_alpha/filter.py @@ -86,8 +86,10 @@ class CreateFilterRestServlet(RestServlet): raise AuthError(403, "Can only create filters for local users") content = parse_json_object_from_request(request) - set_timeline_upper_limit(content, - self.hs.config.filter_timeline_limit) + set_timeline_upper_limit( + content, + self.hs.config.filter_timeline_limit + ) filter_id = yield self.filtering.add_user_filter( user_localpart=target_user.localpart, -- cgit 1.4.1 From d12ae7fd1c213c2505e3904aab98604fa86f42f5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 May 2017 15:42:18 +0100 Subject: Don't log exceptions for NotRetryingDestination --- synapse/rest/media/v1/media_repository.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index c43b185e08..caca96c222 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -34,6 +34,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii from synapse.util.logcontext import preserve_context_over_fn +from synapse.util.retryutils import NotRetryingDestination import os import errno @@ -181,7 +182,8 @@ class MediaRepository(object): logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) raise - + except NotRetryingDestination: + logger.warn("Not retrying destination %r", server_name) except Exception: logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) -- cgit 1.4.1