From b43d9a920b434180b0ae12516b19d09011f37c59 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 4 Apr 2019 18:54:03 +0100 Subject: Fix docstring on get_server_keys_json --- synapse/storage/keys.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 030cd1e5a3..f24ab3eedd 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -188,8 +188,8 @@ class KeyStore(SQLBaseStore): Args: server_keys (list): List of (server_name, key_id, source) triplets. Returns: - Dict mapping (server_name, key_id, source) triplets to dicts with - "ts_valid_until_ms" and "key_json" keys. + Deferred[dict[Tuple[str, str, str|None], list[dict]]]: + Dict mapping (server_name, key_id, source) triplets to lists of dicts """ def _get_server_keys_json_txn(txn): -- cgit 1.5.1 From 2d951686a71fd0c8b927d96fa183747ff2982bf9 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 8 Apr 2019 15:37:26 +0100 Subject: drop tables listed in #1830 (#4992) Tables dropped: * application_services, * application_services_regex, * transaction_id_to_pdu, * stats_reporting * current_state_resets * event_content_hashes * event_destinations * event_edge_hashes * event_signatures * feedback * room_hosts * state_forward_extremities --- changelog.d/4992.misc | 1 + scripts/synapse_port_db | 5 - synapse/storage/events.py | 12 -- synapse/storage/schema/delta/13/v13.sql | 20 +--- .../schema/delta/14/upgrade_appservice_db.py | 42 ------- .../storage/schema/delta/16/unique_constraints.sql | 8 -- .../storage/schema/delta/24/stats_reporting.sql | 12 +- synapse/storage/schema/delta/30/state_stream.sql | 9 +- synapse/storage/schema/delta/32/remove_indices.sql | 4 - .../storage/schema/delta/54/drop_legacy_tables.sql | 30 +++++ .../storage/schema/full_schemas/11/event_edges.sql | 91 --------------- .../schema/full_schemas/11/event_signatures.sql | 55 --------- synapse/storage/schema/full_schemas/11/im.sql | 123 --------------------- synapse/storage/schema/full_schemas/11/keys.sql | 31 ------ .../schema/full_schemas/11/media_repository.sql | 65 ----------- .../storage/schema/full_schemas/11/presence.sql | 35 ------ .../storage/schema/full_schemas/11/profiles.sql | 19 ---- .../storage/schema/full_schemas/11/redactions.sql | 22 ---- .../schema/full_schemas/11/room_aliases.sql | 24 ---- synapse/storage/schema/full_schemas/11/state.sql | 40 ------- .../schema/full_schemas/11/transactions.sql | 44 -------- synapse/storage/schema/full_schemas/11/users.sql | 43 ------- .../full_schemas/16/application_services.sql | 19 +--- .../storage/schema/full_schemas/16/event_edges.sql | 30 +---- .../schema/full_schemas/16/event_signatures.sql | 23 +--- synapse/storage/schema/full_schemas/16/im.sql | 21 +--- .../storage/schema/full_schemas/16/presence.sql | 2 +- 27 files changed, 58 insertions(+), 772 deletions(-) create mode 100644 changelog.d/4992.misc delete mode 100644 synapse/storage/schema/delta/14/upgrade_appservice_db.py create mode 100644 synapse/storage/schema/delta/54/drop_legacy_tables.sql delete mode 100644 synapse/storage/schema/full_schemas/11/event_edges.sql delete mode 100644 synapse/storage/schema/full_schemas/11/event_signatures.sql delete mode 100644 synapse/storage/schema/full_schemas/11/im.sql delete mode 100644 synapse/storage/schema/full_schemas/11/keys.sql delete mode 100644 synapse/storage/schema/full_schemas/11/media_repository.sql delete mode 100644 synapse/storage/schema/full_schemas/11/presence.sql delete mode 100644 synapse/storage/schema/full_schemas/11/profiles.sql delete mode 100644 synapse/storage/schema/full_schemas/11/redactions.sql delete mode 100644 synapse/storage/schema/full_schemas/11/room_aliases.sql delete mode 100644 synapse/storage/schema/full_schemas/11/state.sql delete mode 100644 synapse/storage/schema/full_schemas/11/transactions.sql delete mode 100644 synapse/storage/schema/full_schemas/11/users.sql (limited to 'synapse/storage') diff --git a/changelog.d/4992.misc b/changelog.d/4992.misc new file mode 100644 index 0000000000..8a9eaea4cf --- /dev/null +++ b/changelog.d/4992.misc @@ -0,0 +1 @@ +Remove legacy tables detailed in #1830. diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 3de394b035..41be9c9220 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -58,15 +58,11 @@ BOOLEAN_COLUMNS = { APPEND_ONLY_TABLES = [ - "event_content_hashes", "event_reference_hashes", - "event_signatures", - "event_edge_hashes", "events", "event_json", "state_events", "room_memberships", - "feedback", "topics", "room_names", "rooms", @@ -88,7 +84,6 @@ APPEND_ONLY_TABLES = [ "event_search", "presence_stream", "push_rules_stream", - "current_state_resets", "ex_outlier_stream", "cache_invalidation_stream", "public_room_list_stream", diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dfda39bbe0..7a7f841c6c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1179,14 +1179,10 @@ class EventsStore( "events", "event_auth", "event_json", - "event_content_hashes", - "event_destinations", - "event_edge_hashes", "event_edges", "event_forward_extremities", "event_reference_hashes", "event_search", - "event_signatures", "event_to_state_groups", "guest_access", "history_visibility", @@ -1857,16 +1853,12 @@ class EventsStore( # Tables that should be pruned: # event_auth # event_backward_extremities - # event_content_hashes - # event_destinations - # event_edge_hashes # event_edges # event_forward_extremities # event_json # event_push_actions # event_reference_hashes # event_search - # event_signatures # event_to_state_groups # events # rejections @@ -2065,14 +2057,10 @@ class EventsStore( "events", "event_json", "event_auth", - "event_content_hashes", - "event_destinations", - "event_edge_hashes", "event_edges", "event_forward_extremities", "event_reference_hashes", "event_search", - "event_signatures", "rejections", ): logger.info("[purge] removing events from %s", table) diff --git a/synapse/storage/schema/delta/13/v13.sql b/synapse/storage/schema/delta/13/v13.sql index 5eb93b38b2..f8649e5d99 100644 --- a/synapse/storage/schema/delta/13/v13.sql +++ b/synapse/storage/schema/delta/13/v13.sql @@ -13,19 +13,7 @@ * limitations under the License. */ -CREATE TABLE IF NOT EXISTS application_services( - id INTEGER PRIMARY KEY AUTOINCREMENT, - url TEXT, - token TEXT, - hs_token TEXT, - sender TEXT, - UNIQUE(token) -); - -CREATE TABLE IF NOT EXISTS application_services_regex( - id INTEGER PRIMARY KEY AUTOINCREMENT, - as_id BIGINT UNSIGNED NOT NULL, - namespace INTEGER, /* enum[room_id|room_alias|user_id] */ - regex TEXT, - FOREIGN KEY(as_id) REFERENCES application_services(id) -); +/* We used to create a tables called application_services and + * application_services_regex, but these are no longer used and are removed in + * delta 54. + */ diff --git a/synapse/storage/schema/delta/14/upgrade_appservice_db.py b/synapse/storage/schema/delta/14/upgrade_appservice_db.py deleted file mode 100644 index 4d725b92fe..0000000000 --- a/synapse/storage/schema/delta/14/upgrade_appservice_db.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging - -import simplejson as json - -logger = logging.getLogger(__name__) - - -def run_create(cur, *args, **kwargs): - cur.execute("SELECT id, regex FROM application_services_regex") - for row in cur.fetchall(): - try: - logger.debug("Checking %s..." % row[0]) - json.loads(row[1]) - except ValueError: - # row isn't in json, make it so. - string_regex = row[1] - new_regex = json.dumps({ - "regex": string_regex, - "exclusive": True - }) - cur.execute( - "UPDATE application_services_regex SET regex=? WHERE id=?", - (new_regex, row[0]) - ) - - -def run_upgrade(*args, **kwargs): - pass diff --git a/synapse/storage/schema/delta/16/unique_constraints.sql b/synapse/storage/schema/delta/16/unique_constraints.sql index fecf11118c..5b8de52c33 100644 --- a/synapse/storage/schema/delta/16/unique_constraints.sql +++ b/synapse/storage/schema/delta/16/unique_constraints.sql @@ -17,14 +17,6 @@ DELETE FROM room_memberships WHERE rowid not in ( DROP INDEX IF EXISTS room_memberships_event_id; CREATE UNIQUE INDEX room_memberships_event_id ON room_memberships(event_id); --- -DELETE FROM feedback WHERE rowid not in ( - SELECT MIN(rowid) FROM feedback GROUP BY event_id -); - -DROP INDEX IF EXISTS feedback_event_id; -CREATE UNIQUE INDEX feedback_event_id ON feedback(event_id); - -- DELETE FROM topics WHERE rowid not in ( SELECT MIN(rowid) FROM topics GROUP BY event_id diff --git a/synapse/storage/schema/delta/24/stats_reporting.sql b/synapse/storage/schema/delta/24/stats_reporting.sql index 5f508af7a9..acea7483bd 100644 --- a/synapse/storage/schema/delta/24/stats_reporting.sql +++ b/synapse/storage/schema/delta/24/stats_reporting.sql @@ -1,4 +1,4 @@ -/* Copyright 2015, 2016 OpenMarket Ltd +/* Copyright 2019 New Vector Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,10 +13,6 @@ * limitations under the License. */ --- Should only ever contain one row -CREATE TABLE IF NOT EXISTS stats_reporting( - -- The stream ordering token which was most recently reported as stats - reported_stream_token INTEGER, - -- The time (seconds since epoch) stats were most recently reported - reported_time BIGINT -); + /* We used to create a table called stats_reporting, but this is no longer + * used and is removed in delta 54. + */ \ No newline at end of file diff --git a/synapse/storage/schema/delta/30/state_stream.sql b/synapse/storage/schema/delta/30/state_stream.sql index 706fe1dcf4..e85699e82e 100644 --- a/synapse/storage/schema/delta/30/state_stream.sql +++ b/synapse/storage/schema/delta/30/state_stream.sql @@ -14,15 +14,10 @@ */ -/** - * The positions in the event stream_ordering when the current_state was - * replaced by the state at the event. +/* We used to create a table called current_state_resets, but this is no + * longer used and is removed in delta 54. */ -CREATE TABLE IF NOT EXISTS current_state_resets( - event_stream_ordering BIGINT PRIMARY KEY NOT NULL -); - /* The outlier events that have aquired a state group typically through * backfill. This is tracked separately to the events table, as assigning a * state group change the position of the existing event in the stream diff --git a/synapse/storage/schema/delta/32/remove_indices.sql b/synapse/storage/schema/delta/32/remove_indices.sql index f859be46a6..4219cdd06a 100644 --- a/synapse/storage/schema/delta/32/remove_indices.sql +++ b/synapse/storage/schema/delta/32/remove_indices.sql @@ -24,13 +24,9 @@ DROP INDEX IF EXISTS state_groups_id; -- Duplicate of PRIMARY KEY DROP INDEX IF EXISTS event_to_state_groups_id; -- Duplicate of PRIMARY KEY DROP INDEX IF EXISTS event_push_actions_room_id_event_id_user_id_profile_tag; -- Duplicate of UNIQUE CONSTRAINT -DROP INDEX IF EXISTS event_destinations_id; -- Prefix of UNIQUE CONSTRAINT DROP INDEX IF EXISTS st_extrem_id; -- Prefix of UNIQUE CONSTRAINT -DROP INDEX IF EXISTS event_content_hashes_id; -- Prefix of UNIQUE CONSTRAINT DROP INDEX IF EXISTS event_signatures_id; -- Prefix of UNIQUE CONSTRAINT -DROP INDEX IF EXISTS event_edge_hashes_id; -- Prefix of UNIQUE CONSTRAINT DROP INDEX IF EXISTS redactions_event_id; -- Duplicate of UNIQUE CONSTRAINT -DROP INDEX IF EXISTS room_hosts_room_id; -- Prefix of UNIQUE CONSTRAINT -- The following indices were unused DROP INDEX IF EXISTS remote_media_cache_thumbnails_media_id; diff --git a/synapse/storage/schema/delta/54/drop_legacy_tables.sql b/synapse/storage/schema/delta/54/drop_legacy_tables.sql new file mode 100644 index 0000000000..77b39dc2d2 --- /dev/null +++ b/synapse/storage/schema/delta/54/drop_legacy_tables.sql @@ -0,0 +1,30 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +DROP TABLE IF EXISTS application_services; +DROP TABLE IF EXISTS application_services_regex; +DROP TABLE IF EXISTS transaction_id_to_pdu; +DROP TABLE IF EXISTS stats_reporting; +DROP TABLE IF EXISTS current_state_resets; +DROP TABLE IF EXISTS event_content_hashes; +DROP TABLE IF EXISTS event_destinations; +DROP TABLE IF EXISTS event_edge_hashes; +DROP TABLE IF EXISTS event_signatures; +DROP TABLE IF EXISTS feedback; +DROP TABLE IF EXISTS room_hosts; +DROP TABLE IF EXISTS state_forward_extremities; + + + diff --git a/synapse/storage/schema/full_schemas/11/event_edges.sql b/synapse/storage/schema/full_schemas/11/event_edges.sql deleted file mode 100644 index bccd1c6f74..0000000000 --- a/synapse/storage/schema/full_schemas/11/event_edges.sql +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CREATE TABLE IF NOT EXISTS event_forward_extremities( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - UNIQUE (event_id, room_id) -); - -CREATE INDEX ev_extrem_room ON event_forward_extremities(room_id); -CREATE INDEX ev_extrem_id ON event_forward_extremities(event_id); - - -CREATE TABLE IF NOT EXISTS event_backward_extremities( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - UNIQUE (event_id, room_id) -); - -CREATE INDEX ev_b_extrem_room ON event_backward_extremities(room_id); -CREATE INDEX ev_b_extrem_id ON event_backward_extremities(event_id); - - -CREATE TABLE IF NOT EXISTS event_edges( - event_id TEXT NOT NULL, - prev_event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - -- We no longer insert prev_state into this table, so all new rows will have - -- is_state as false. - is_state BOOL NOT NULL, - UNIQUE (event_id, prev_event_id, room_id, is_state) -); - -CREATE INDEX ev_edges_id ON event_edges(event_id); -CREATE INDEX ev_edges_prev_id ON event_edges(prev_event_id); - - -CREATE TABLE IF NOT EXISTS room_depth( - room_id TEXT NOT NULL, - min_depth INTEGER NOT NULL, - UNIQUE (room_id) -); - -CREATE INDEX room_depth_room ON room_depth(room_id); - - -create TABLE IF NOT EXISTS event_destinations( - event_id TEXT NOT NULL, - destination TEXT NOT NULL, - delivered_ts BIGINT DEFAULT 0, -- or 0 if not delivered - UNIQUE (event_id, destination) -); - -CREATE INDEX event_destinations_id ON event_destinations(event_id); - - -CREATE TABLE IF NOT EXISTS state_forward_extremities( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - type TEXT NOT NULL, - state_key TEXT NOT NULL, - UNIQUE (event_id, room_id) -); - -CREATE INDEX st_extrem_keys ON state_forward_extremities( - room_id, type, state_key -); -CREATE INDEX st_extrem_id ON state_forward_extremities(event_id); - - -CREATE TABLE IF NOT EXISTS event_auth( - event_id TEXT NOT NULL, - auth_id TEXT NOT NULL, - room_id TEXT NOT NULL, - UNIQUE (event_id, auth_id, room_id) -); - -CREATE INDEX evauth_edges_id ON event_auth(event_id); -CREATE INDEX evauth_edges_auth_id ON event_auth(auth_id); diff --git a/synapse/storage/schema/full_schemas/11/event_signatures.sql b/synapse/storage/schema/full_schemas/11/event_signatures.sql deleted file mode 100644 index 00ce85980e..0000000000 --- a/synapse/storage/schema/full_schemas/11/event_signatures.sql +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CREATE TABLE IF NOT EXISTS event_content_hashes ( - event_id TEXT, - algorithm TEXT, - hash bytea, - UNIQUE (event_id, algorithm) -); - -CREATE INDEX event_content_hashes_id ON event_content_hashes(event_id); - - -CREATE TABLE IF NOT EXISTS event_reference_hashes ( - event_id TEXT, - algorithm TEXT, - hash bytea, - UNIQUE (event_id, algorithm) -); - -CREATE INDEX event_reference_hashes_id ON event_reference_hashes(event_id); - - -CREATE TABLE IF NOT EXISTS event_signatures ( - event_id TEXT, - signature_name TEXT, - key_id TEXT, - signature bytea, - UNIQUE (event_id, signature_name, key_id) -); - -CREATE INDEX event_signatures_id ON event_signatures(event_id); - - -CREATE TABLE IF NOT EXISTS event_edge_hashes( - event_id TEXT, - prev_event_id TEXT, - algorithm TEXT, - hash bytea, - UNIQUE (event_id, prev_event_id, algorithm) -); - -CREATE INDEX event_edge_hashes_id ON event_edge_hashes(event_id); diff --git a/synapse/storage/schema/full_schemas/11/im.sql b/synapse/storage/schema/full_schemas/11/im.sql deleted file mode 100644 index dfbbf9fd54..0000000000 --- a/synapse/storage/schema/full_schemas/11/im.sql +++ /dev/null @@ -1,123 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CREATE TABLE IF NOT EXISTS events( - stream_ordering INTEGER PRIMARY KEY AUTOINCREMENT, - topological_ordering BIGINT NOT NULL, - event_id TEXT NOT NULL, - type TEXT NOT NULL, - room_id TEXT NOT NULL, - content TEXT NOT NULL, - unrecognized_keys TEXT, - processed BOOL NOT NULL, - outlier BOOL NOT NULL, - depth BIGINT DEFAULT 0 NOT NULL, - UNIQUE (event_id) -); - -CREATE INDEX events_stream_ordering ON events (stream_ordering); -CREATE INDEX events_topological_ordering ON events (topological_ordering); -CREATE INDEX events_room_id ON events (room_id); - - -CREATE TABLE IF NOT EXISTS event_json( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - internal_metadata TEXT NOT NULL, - json TEXT NOT NULL, - UNIQUE (event_id) -); - -CREATE INDEX event_json_room_id ON event_json(room_id); - - -CREATE TABLE IF NOT EXISTS state_events( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - type TEXT NOT NULL, - state_key TEXT NOT NULL, - prev_state TEXT, - UNIQUE (event_id) -); - -CREATE INDEX state_events_room_id ON state_events (room_id); -CREATE INDEX state_events_type ON state_events (type); -CREATE INDEX state_events_state_key ON state_events (state_key); - - -CREATE TABLE IF NOT EXISTS current_state_events( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - type TEXT NOT NULL, - state_key TEXT NOT NULL, - UNIQUE (room_id, type, state_key) -); - -CREATE INDEX curr_events_event_id ON current_state_events (event_id); -CREATE INDEX current_state_events_room_id ON current_state_events (room_id); -CREATE INDEX current_state_events_type ON current_state_events (type); -CREATE INDEX current_state_events_state_key ON current_state_events (state_key); - -CREATE TABLE IF NOT EXISTS room_memberships( - event_id TEXT NOT NULL, - user_id TEXT NOT NULL, - sender TEXT NOT NULL, - room_id TEXT NOT NULL, - membership TEXT NOT NULL -); - -CREATE INDEX room_memberships_event_id ON room_memberships (event_id); -CREATE INDEX room_memberships_room_id ON room_memberships (room_id); -CREATE INDEX room_memberships_user_id ON room_memberships (user_id); - -CREATE TABLE IF NOT EXISTS feedback( - event_id TEXT NOT NULL, - feedback_type TEXT, - target_event_id TEXT, - sender TEXT, - room_id TEXT -); - -CREATE TABLE IF NOT EXISTS topics( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - topic TEXT NOT NULL -); - -CREATE INDEX topics_event_id ON topics(event_id); -CREATE INDEX topics_room_id ON topics(room_id); - -CREATE TABLE IF NOT EXISTS room_names( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - name TEXT NOT NULL -); - -CREATE INDEX room_names_event_id ON room_names(event_id); -CREATE INDEX room_names_room_id ON room_names(room_id); - -CREATE TABLE IF NOT EXISTS rooms( - room_id TEXT PRIMARY KEY NOT NULL, - is_public BOOL, - creator TEXT -); - -CREATE TABLE IF NOT EXISTS room_hosts( - room_id TEXT NOT NULL, - host TEXT NOT NULL, - UNIQUE (room_id, host) -); - -CREATE INDEX room_hosts_room_id ON room_hosts (room_id); diff --git a/synapse/storage/schema/full_schemas/11/keys.sql b/synapse/storage/schema/full_schemas/11/keys.sql deleted file mode 100644 index ca0ca1b694..0000000000 --- a/synapse/storage/schema/full_schemas/11/keys.sql +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -CREATE TABLE IF NOT EXISTS server_tls_certificates( - server_name TEXT, -- Server name. - fingerprint TEXT, -- Certificate fingerprint. - from_server TEXT, -- Which key server the certificate was fetched from. - ts_added_ms BIGINT, -- When the certifcate was added. - tls_certificate bytea, -- DER encoded x509 certificate. - UNIQUE (server_name, fingerprint) -); - -CREATE TABLE IF NOT EXISTS server_signature_keys( - server_name TEXT, -- Server name. - key_id TEXT, -- Key version. - from_server TEXT, -- Which key server the key was fetched form. - ts_added_ms BIGINT, -- When the key was added. - verify_key bytea, -- NACL verification key. - UNIQUE (server_name, key_id) -); diff --git a/synapse/storage/schema/full_schemas/11/media_repository.sql b/synapse/storage/schema/full_schemas/11/media_repository.sql deleted file mode 100644 index 9c264d6ece..0000000000 --- a/synapse/storage/schema/full_schemas/11/media_repository.sql +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CREATE TABLE IF NOT EXISTS local_media_repository ( - media_id TEXT, -- The id used to refer to the media. - media_type TEXT, -- The MIME-type of the media. - media_length INTEGER, -- Length of the media in bytes. - created_ts BIGINT, -- When the content was uploaded in ms. - upload_name TEXT, -- The name the media was uploaded with. - user_id TEXT, -- The user who uploaded the file. - UNIQUE (media_id) -); - -CREATE TABLE IF NOT EXISTS local_media_repository_thumbnails ( - media_id TEXT, -- The id used to refer to the media. - thumbnail_width INTEGER, -- The width of the thumbnail in pixels. - thumbnail_height INTEGER, -- The height of the thumbnail in pixels. - thumbnail_type TEXT, -- The MIME-type of the thumbnail. - thumbnail_method TEXT, -- The method used to make the thumbnail. - thumbnail_length INTEGER, -- The length of the thumbnail in bytes. - UNIQUE ( - media_id, thumbnail_width, thumbnail_height, thumbnail_type - ) -); - -CREATE INDEX local_media_repository_thumbnails_media_id - ON local_media_repository_thumbnails (media_id); - -CREATE TABLE IF NOT EXISTS remote_media_cache ( - media_origin TEXT, -- The remote HS the media came from. - media_id TEXT, -- The id used to refer to the media on that server. - media_type TEXT, -- The MIME-type of the media. - created_ts BIGINT, -- When the content was uploaded in ms. - upload_name TEXT, -- The name the media was uploaded with. - media_length INTEGER, -- Length of the media in bytes. - filesystem_id TEXT, -- The name used to store the media on disk. - UNIQUE (media_origin, media_id) -); - -CREATE TABLE IF NOT EXISTS remote_media_cache_thumbnails ( - media_origin TEXT, -- The remote HS the media came from. - media_id TEXT, -- The id used to refer to the media. - thumbnail_width INTEGER, -- The width of the thumbnail in pixels. - thumbnail_height INTEGER, -- The height of the thumbnail in pixels. - thumbnail_method TEXT, -- The method used to make the thumbnail - thumbnail_type TEXT, -- The MIME-type of the thumbnail. - thumbnail_length INTEGER, -- The length of the thumbnail in bytes. - filesystem_id TEXT, -- The name used to store the media on disk. - UNIQUE ( - media_origin, media_id, thumbnail_width, thumbnail_height, - thumbnail_type - ) -); diff --git a/synapse/storage/schema/full_schemas/11/presence.sql b/synapse/storage/schema/full_schemas/11/presence.sql deleted file mode 100644 index 492725994c..0000000000 --- a/synapse/storage/schema/full_schemas/11/presence.sql +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -CREATE TABLE IF NOT EXISTS presence( - user_id TEXT NOT NULL, - state VARCHAR(20), - status_msg TEXT, - mtime BIGINT -- miliseconds since last state change -); - --- For each of /my/ users which possibly-remote users are allowed to see their --- presence state -CREATE TABLE IF NOT EXISTS presence_allow_inbound( - observed_user_id TEXT NOT NULL, - observer_user_id TEXT NOT NULL -- a UserID, -); - --- For each of /my/ users (watcher), which possibly-remote users are they --- watching? -CREATE TABLE IF NOT EXISTS presence_list( - user_id TEXT NOT NULL, - observed_user_id TEXT NOT NULL, -- a UserID, - accepted BOOLEAN NOT NULL -); diff --git a/synapse/storage/schema/full_schemas/11/profiles.sql b/synapse/storage/schema/full_schemas/11/profiles.sql deleted file mode 100644 index b314e6df75..0000000000 --- a/synapse/storage/schema/full_schemas/11/profiles.sql +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -CREATE TABLE IF NOT EXISTS profiles( - user_id TEXT NOT NULL, - displayname TEXT, - avatar_url TEXT -); diff --git a/synapse/storage/schema/full_schemas/11/redactions.sql b/synapse/storage/schema/full_schemas/11/redactions.sql deleted file mode 100644 index 318f0d9aa5..0000000000 --- a/synapse/storage/schema/full_schemas/11/redactions.sql +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -CREATE TABLE IF NOT EXISTS redactions ( - event_id TEXT NOT NULL, - redacts TEXT NOT NULL, - UNIQUE (event_id) -); - -CREATE INDEX redactions_event_id ON redactions (event_id); -CREATE INDEX redactions_redacts ON redactions (redacts); diff --git a/synapse/storage/schema/full_schemas/11/room_aliases.sql b/synapse/storage/schema/full_schemas/11/room_aliases.sql deleted file mode 100644 index 71a91f8ec9..0000000000 --- a/synapse/storage/schema/full_schemas/11/room_aliases.sql +++ /dev/null @@ -1,24 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CREATE TABLE IF NOT EXISTS room_aliases( - room_alias TEXT NOT NULL, - room_id TEXT NOT NULL -); - -CREATE TABLE IF NOT EXISTS room_alias_servers( - room_alias TEXT NOT NULL, - server TEXT NOT NULL -); diff --git a/synapse/storage/schema/full_schemas/11/state.sql b/synapse/storage/schema/full_schemas/11/state.sql deleted file mode 100644 index b901e0f017..0000000000 --- a/synapse/storage/schema/full_schemas/11/state.sql +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CREATE TABLE IF NOT EXISTS state_groups( - id INTEGER PRIMARY KEY, - room_id TEXT NOT NULL, - event_id TEXT NOT NULL -); - -CREATE TABLE IF NOT EXISTS state_groups_state( - state_group INTEGER NOT NULL, - room_id TEXT NOT NULL, - type TEXT NOT NULL, - state_key TEXT NOT NULL, - event_id TEXT NOT NULL -); - -CREATE TABLE IF NOT EXISTS event_to_state_groups( - event_id TEXT NOT NULL, - state_group INTEGER NOT NULL, - UNIQUE (event_id) -); - -CREATE INDEX state_groups_id ON state_groups(id); - -CREATE INDEX state_groups_state_id ON state_groups_state(state_group); -CREATE INDEX state_groups_state_tuple ON state_groups_state(room_id, type, state_key); -CREATE INDEX event_to_state_groups_id ON event_to_state_groups(event_id); diff --git a/synapse/storage/schema/full_schemas/11/transactions.sql b/synapse/storage/schema/full_schemas/11/transactions.sql deleted file mode 100644 index f6a058832e..0000000000 --- a/synapse/storage/schema/full_schemas/11/transactions.sql +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ --- Stores what transaction ids we have received and what our response was -CREATE TABLE IF NOT EXISTS received_transactions( - transaction_id TEXT, - origin TEXT, - ts BIGINT, - response_code INTEGER, - response_json bytea, - has_been_referenced SMALLINT DEFAULT 0, -- Whether thishas been referenced by a prev_tx - UNIQUE (transaction_id, origin) -); - -CREATE INDEX transactions_have_ref ON received_transactions(origin, has_been_referenced);-- WHERE has_been_referenced = 0; - --- For sent transactions only. -CREATE TABLE IF NOT EXISTS transaction_id_to_pdu( - transaction_id INTEGER, - destination TEXT, - pdu_id TEXT, - pdu_origin TEXT -); - -CREATE INDEX transaction_id_to_pdu_tx ON transaction_id_to_pdu(transaction_id, destination); -CREATE INDEX transaction_id_to_pdu_dest ON transaction_id_to_pdu(destination); - --- To track destination health -CREATE TABLE IF NOT EXISTS destinations( - destination TEXT PRIMARY KEY, - retry_last_ts BIGINT, - retry_interval INTEGER -); diff --git a/synapse/storage/schema/full_schemas/11/users.sql b/synapse/storage/schema/full_schemas/11/users.sql deleted file mode 100644 index 6c1d4c34a1..0000000000 --- a/synapse/storage/schema/full_schemas/11/users.sql +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright 2014-2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -CREATE TABLE IF NOT EXISTS users( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT, - password_hash TEXT, - creation_ts BIGINT, - admin SMALLINT DEFAULT 0 NOT NULL, - UNIQUE(name) -); - -CREATE TABLE IF NOT EXISTS access_tokens( - id INTEGER PRIMARY KEY AUTOINCREMENT, - user_id TEXT NOT NULL, - device_id TEXT, - token TEXT NOT NULL, - last_used BIGINT, - UNIQUE(token) -); - -CREATE TABLE IF NOT EXISTS user_ips ( - user TEXT NOT NULL, - access_token TEXT NOT NULL, - device_id TEXT, - ip TEXT NOT NULL, - user_agent TEXT NOT NULL, - last_seen BIGINT NOT NULL, - UNIQUE (user, access_token, ip, user_agent) -); - -CREATE INDEX user_ips_user ON user_ips(user); diff --git a/synapse/storage/schema/full_schemas/16/application_services.sql b/synapse/storage/schema/full_schemas/16/application_services.sql index aee0e68473..883fcd10b2 100644 --- a/synapse/storage/schema/full_schemas/16/application_services.sql +++ b/synapse/storage/schema/full_schemas/16/application_services.sql @@ -13,22 +13,11 @@ * limitations under the License. */ -CREATE TABLE IF NOT EXISTS application_services( - id BIGINT PRIMARY KEY, - url TEXT, - token TEXT, - hs_token TEXT, - sender TEXT, - UNIQUE(token) -); +/* We used to create tables called application_services and + * application_services_regex, but these are no longer used and are removed in + * delta 54. + */ -CREATE TABLE IF NOT EXISTS application_services_regex( - id BIGINT PRIMARY KEY, - as_id BIGINT NOT NULL, - namespace INTEGER, /* enum[room_id|room_alias|user_id] */ - regex TEXT, - FOREIGN KEY(as_id) REFERENCES application_services(id) -); CREATE TABLE IF NOT EXISTS application_services_state( as_id TEXT PRIMARY KEY, diff --git a/synapse/storage/schema/full_schemas/16/event_edges.sql b/synapse/storage/schema/full_schemas/16/event_edges.sql index 6b5a5a88fa..10ce2aa7a0 100644 --- a/synapse/storage/schema/full_schemas/16/event_edges.sql +++ b/synapse/storage/schema/full_schemas/16/event_edges.sql @@ -13,6 +13,11 @@ * limitations under the License. */ +/* We used to create tables called event_destinations and + * state_forward_extremities, but these are no longer used and are removed in + * delta 54. + */ + CREATE TABLE IF NOT EXISTS event_forward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, @@ -54,31 +59,6 @@ CREATE TABLE IF NOT EXISTS room_depth( CREATE INDEX room_depth_room ON room_depth(room_id); - -create TABLE IF NOT EXISTS event_destinations( - event_id TEXT NOT NULL, - destination TEXT NOT NULL, - delivered_ts BIGINT DEFAULT 0, -- or 0 if not delivered - UNIQUE (event_id, destination) -); - -CREATE INDEX event_destinations_id ON event_destinations(event_id); - - -CREATE TABLE IF NOT EXISTS state_forward_extremities( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - type TEXT NOT NULL, - state_key TEXT NOT NULL, - UNIQUE (event_id, room_id) -); - -CREATE INDEX st_extrem_keys ON state_forward_extremities( - room_id, type, state_key -); -CREATE INDEX st_extrem_id ON state_forward_extremities(event_id); - - CREATE TABLE IF NOT EXISTS event_auth( event_id TEXT NOT NULL, auth_id TEXT NOT NULL, diff --git a/synapse/storage/schema/full_schemas/16/event_signatures.sql b/synapse/storage/schema/full_schemas/16/event_signatures.sql index 00ce85980e..95826da431 100644 --- a/synapse/storage/schema/full_schemas/16/event_signatures.sql +++ b/synapse/storage/schema/full_schemas/16/event_signatures.sql @@ -13,15 +13,9 @@ * limitations under the License. */ -CREATE TABLE IF NOT EXISTS event_content_hashes ( - event_id TEXT, - algorithm TEXT, - hash bytea, - UNIQUE (event_id, algorithm) -); - -CREATE INDEX event_content_hashes_id ON event_content_hashes(event_id); - + /* We used to create tables called event_content_hashes and event_edge_hashes, + * but these are no longer used and are removed in delta 54. + */ CREATE TABLE IF NOT EXISTS event_reference_hashes ( event_id TEXT, @@ -42,14 +36,3 @@ CREATE TABLE IF NOT EXISTS event_signatures ( ); CREATE INDEX event_signatures_id ON event_signatures(event_id); - - -CREATE TABLE IF NOT EXISTS event_edge_hashes( - event_id TEXT, - prev_event_id TEXT, - algorithm TEXT, - hash bytea, - UNIQUE (event_id, prev_event_id, algorithm) -); - -CREATE INDEX event_edge_hashes_id ON event_edge_hashes(event_id); diff --git a/synapse/storage/schema/full_schemas/16/im.sql b/synapse/storage/schema/full_schemas/16/im.sql index 5f5cb8d01d..a1a2aa8e5b 100644 --- a/synapse/storage/schema/full_schemas/16/im.sql +++ b/synapse/storage/schema/full_schemas/16/im.sql @@ -13,6 +13,10 @@ * limitations under the License. */ +/* We used to create tables called room_hosts and feedback, + * but these are no longer used and are removed in delta 54. + */ + CREATE TABLE IF NOT EXISTS events( stream_ordering INTEGER PRIMARY KEY, topological_ordering BIGINT NOT NULL, @@ -91,15 +95,6 @@ CREATE TABLE IF NOT EXISTS room_memberships( CREATE INDEX room_memberships_room_id ON room_memberships (room_id); CREATE INDEX room_memberships_user_id ON room_memberships (user_id); -CREATE TABLE IF NOT EXISTS feedback( - event_id TEXT NOT NULL, - feedback_type TEXT, - target_event_id TEXT, - sender TEXT, - room_id TEXT, - UNIQUE (event_id) -); - CREATE TABLE IF NOT EXISTS topics( event_id TEXT NOT NULL, room_id TEXT NOT NULL, @@ -123,11 +118,3 @@ CREATE TABLE IF NOT EXISTS rooms( is_public BOOL, creator TEXT ); - -CREATE TABLE IF NOT EXISTS room_hosts( - room_id TEXT NOT NULL, - host TEXT NOT NULL, - UNIQUE (room_id, host) -); - -CREATE INDEX room_hosts_room_id ON room_hosts (room_id); diff --git a/synapse/storage/schema/full_schemas/16/presence.sql b/synapse/storage/schema/full_schemas/16/presence.sql index 0892c4cf96..01d2d8f833 100644 --- a/synapse/storage/schema/full_schemas/16/presence.sql +++ b/synapse/storage/schema/full_schemas/16/presence.sql @@ -28,5 +28,5 @@ CREATE TABLE IF NOT EXISTS presence_allow_inbound( UNIQUE (observed_user_id, observer_user_id) ); --- We used to create a table called presence_list, but this is no longer used +-- We used to create a table called presence_list, but this is no longer used -- and is removed in delta 54. \ No newline at end of file -- cgit 1.5.1 From 3352baac4b03f3414e0a006b9413b65454d1fe91 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 8 Apr 2019 21:50:18 +0100 Subject: Remove unused server_tls_certificates functions (#5028) These have been unused since #4120, and with the demise of perspectives, it is unlikely that they will ever be used again. --- changelog.d/4992.misc | 2 +- changelog.d/5028.misc | 1 + synapse/replication/slave/storage/keys.py | 3 -- synapse/storage/keys.py | 49 +--------------------- .../storage/schema/delta/54/drop_legacy_tables.sql | 4 +- synapse/storage/schema/full_schemas/16/keys.sql | 11 ++--- 6 files changed, 7 insertions(+), 63 deletions(-) create mode 100644 changelog.d/5028.misc (limited to 'synapse/storage') diff --git a/changelog.d/4992.misc b/changelog.d/4992.misc index 8a9eaea4cf..3ee4228c09 100644 --- a/changelog.d/4992.misc +++ b/changelog.d/4992.misc @@ -1 +1 @@ -Remove legacy tables detailed in #1830. +Remove a number of unused tables from the database schema. diff --git a/changelog.d/5028.misc b/changelog.d/5028.misc new file mode 100644 index 0000000000..3ee4228c09 --- /dev/null +++ b/changelog.d/5028.misc @@ -0,0 +1 @@ +Remove a number of unused tables from the database schema. diff --git a/synapse/replication/slave/storage/keys.py b/synapse/replication/slave/storage/keys.py index 8032f53fec..de00660c0e 100644 --- a/synapse/replication/slave/storage/keys.py +++ b/synapse/replication/slave/storage/keys.py @@ -27,8 +27,5 @@ class SlavedKeyStore(BaseSlavedStore): get_server_verify_keys = __func__(DataStore.get_server_verify_keys) store_server_verify_key = __func__(DataStore.store_server_verify_key) - get_server_certificate = __func__(DataStore.get_server_certificate) - store_server_certificate = __func__(DataStore.store_server_certificate) - get_server_keys_json = __func__(DataStore.get_server_keys_json) store_server_keys_json = __func__(DataStore.store_server_keys_json) diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index f24ab3eedd..47a9aa784b 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -13,14 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import hashlib import logging import six from signedjson.key import decode_verify_key_bytes -import OpenSSL from twisted.internet import defer from synapse.util.caches.descriptors import cachedInlineCallbacks @@ -38,53 +36,8 @@ else: class KeyStore(SQLBaseStore): - """Persistence for signature verification keys and tls X.509 certificates + """Persistence for signature verification keys """ - - @defer.inlineCallbacks - def get_server_certificate(self, server_name): - """Retrieve the TLS X.509 certificate for the given server - Args: - server_name (bytes): The name of the server. - Returns: - (OpenSSL.crypto.X509): The tls certificate. - """ - tls_certificate_bytes, = yield self._simple_select_one( - table="server_tls_certificates", - keyvalues={"server_name": server_name}, - retcols=("tls_certificate",), - desc="get_server_certificate", - ) - tls_certificate = OpenSSL.crypto.load_certificate( - OpenSSL.crypto.FILETYPE_ASN1, tls_certificate_bytes - ) - defer.returnValue(tls_certificate) - - def store_server_certificate( - self, server_name, from_server, time_now_ms, tls_certificate - ): - """Stores the TLS X.509 certificate for the given server - Args: - server_name (str): The name of the server. - from_server (str): Where the certificate was looked up - time_now_ms (int): The time now in milliseconds - tls_certificate (OpenSSL.crypto.X509): The X.509 certificate. - """ - tls_certificate_bytes = OpenSSL.crypto.dump_certificate( - OpenSSL.crypto.FILETYPE_ASN1, tls_certificate - ) - fingerprint = hashlib.sha256(tls_certificate_bytes).hexdigest() - return self._simple_upsert( - table="server_tls_certificates", - keyvalues={"server_name": server_name, "fingerprint": fingerprint}, - values={ - "from_server": from_server, - "ts_added_ms": time_now_ms, - "tls_certificate": db_binary_type(tls_certificate_bytes), - }, - desc="store_server_certificate", - ) - @cachedInlineCallbacks() def _get_server_verify_key(self, server_name, key_id): verify_key_bytes = yield self._simple_select_one_onecol( diff --git a/synapse/storage/schema/delta/54/drop_legacy_tables.sql b/synapse/storage/schema/delta/54/drop_legacy_tables.sql index 77b39dc2d2..ecca005d9b 100644 --- a/synapse/storage/schema/delta/54/drop_legacy_tables.sql +++ b/synapse/storage/schema/delta/54/drop_legacy_tables.sql @@ -24,7 +24,5 @@ DROP TABLE IF EXISTS event_edge_hashes; DROP TABLE IF EXISTS event_signatures; DROP TABLE IF EXISTS feedback; DROP TABLE IF EXISTS room_hosts; +DROP TABLE IF EXISTS server_tls_certificates; DROP TABLE IF EXISTS state_forward_extremities; - - - diff --git a/synapse/storage/schema/full_schemas/16/keys.sql b/synapse/storage/schema/full_schemas/16/keys.sql index ca0ca1b694..11cdffdbb3 100644 --- a/synapse/storage/schema/full_schemas/16/keys.sql +++ b/synapse/storage/schema/full_schemas/16/keys.sql @@ -12,14 +12,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -CREATE TABLE IF NOT EXISTS server_tls_certificates( - server_name TEXT, -- Server name. - fingerprint TEXT, -- Certificate fingerprint. - from_server TEXT, -- Which key server the certificate was fetched from. - ts_added_ms BIGINT, -- When the certifcate was added. - tls_certificate bytea, -- DER encoded x509 certificate. - UNIQUE (server_name, fingerprint) -); + +-- we used to create a table called server_tls_certificates, but this is no +-- longer used, and is removed in delta 54. CREATE TABLE IF NOT EXISTS server_signature_keys( server_name TEXT, -- Server name. -- cgit 1.5.1 From 18b69be00f9fa79cf2b237992ef1f0094d1dc453 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 8 Apr 2019 14:51:07 +0100 Subject: Rewrite Datastore.get_server_verify_keys Rewrite this so that it doesn't hammer the database. --- synapse/crypto/keyring.py | 38 +++++++++++------------- synapse/storage/keys.py | 74 ++++++++++++++++++++++++++++------------------ tests/storage/test_keys.py | 53 +++++++++++++++++++++++++++++++-- 3 files changed, 113 insertions(+), 52 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index ede120b2a6..834b107705 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -301,13 +301,12 @@ class Keyring(object): # complete this VerifyKeyRequest. result_keys = results.get(server_name, {}) for key_id in verify_request.key_ids: - if key_id in result_keys: + key = result_keys.get(key_id) + if key: with PreserveLoggingContext(): - verify_request.deferred.callback(( - server_name, - key_id, - result_keys[key_id], - )) + verify_request.deferred.callback( + (server_name, key_id, key) + ) break else: # The else block is only reached if the loop above @@ -341,27 +340,24 @@ class Keyring(object): @defer.inlineCallbacks def get_keys_from_store(self, server_name_and_key_ids): """ - Args: - server_name_and_key_ids (list[(str, iterable[str])]): + server_name_and_key_ids (iterable(Tuple[str, iterable[str]]): list of (server_name, iterable[key_id]) tuples to fetch keys for Returns: - Deferred: resolves to dict[str, dict[str, VerifyKey]]: map from + Deferred: resolves to dict[str, dict[str, VerifyKey|None]]: map from server_name -> key_id -> VerifyKey """ - res = yield logcontext.make_deferred_yieldable(defer.gatherResults( - [ - run_in_background( - self.store.get_server_verify_keys, - server_name, key_ids, - ).addCallback(lambda ks, server: (server, ks), server_name) - for server_name, key_ids in server_name_and_key_ids - ], - consumeErrors=True, - ).addErrback(unwrapFirstError)) - - defer.returnValue(dict(res)) + keys_to_fetch = ( + (server_name, key_id) + for server_name, key_ids in server_name_and_key_ids + for key_id in key_ids + ) + res = yield self.store.get_server_verify_keys(keys_to_fetch) + keys = {} + for (server_name, key_id), key in res.items(): + keys.setdefault(server_name, {})[key_id] = key + defer.returnValue(keys) @defer.inlineCallbacks def get_keys_from_perspectives(self, server_name_and_key_ids): diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 47a9aa784b..7036541792 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,15 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import itertools import logging import six from signedjson.key import decode_verify_key_bytes -from twisted.internet import defer - -from synapse.util.caches.descriptors import cachedInlineCallbacks +from synapse.util import batch_iter +from synapse.util.caches.descriptors import cached, cachedList from ._base import SQLBaseStore @@ -38,36 +39,50 @@ else: class KeyStore(SQLBaseStore): """Persistence for signature verification keys """ - @cachedInlineCallbacks() - def _get_server_verify_key(self, server_name, key_id): - verify_key_bytes = yield self._simple_select_one_onecol( - table="server_signature_keys", - keyvalues={"server_name": server_name, "key_id": key_id}, - retcol="verify_key", - desc="_get_server_verify_key", - allow_none=True, - ) - if verify_key_bytes: - defer.returnValue(decode_verify_key_bytes(key_id, bytes(verify_key_bytes))) + @cached() + def _get_server_verify_key(self, server_name_and_key_id): + raise NotImplementedError() - @defer.inlineCallbacks - def get_server_verify_keys(self, server_name, key_ids): - """Retrieve the NACL verification key for a given server for the given - key_ids + @cachedList( + cached_method_name="_get_server_verify_key", list_name="server_name_and_key_ids" + ) + def get_server_verify_keys(self, server_name_and_key_ids): + """ Args: - server_name (str): The name of the server. - key_ids (iterable[str]): key_ids to try and look up. + server_name_and_key_ids (iterable[Tuple[str, str]]): + iterable of (server_name, key-id) tuples to fetch keys for + Returns: - Deferred: resolves to dict[str, VerifyKey]: map from - key_id to verification key. + Deferred: resolves to dict[Tuple[str, str], VerifyKey|None]: + map from (server_name, key_id) -> VerifyKey, or None if the key is + unknown """ keys = {} - for key_id in key_ids: - key = yield self._get_server_verify_key(server_name, key_id) - if key: - keys[key_id] = key - defer.returnValue(keys) + + def _get_keys(txn, batch): + """Processes a batch of keys to fetch, and adds the result to `keys`.""" + + # batch_iter always returns tuples so it's safe to do len(batch) + sql = ( + "SELECT server_name, key_id, verify_key FROM server_signature_keys " + "WHERE 1=0" + ) + " OR (server_name=? AND key_id=?)" * len(batch) + + txn.execute(sql, tuple(itertools.chain.from_iterable(batch))) + + for row in txn: + server_name, key_id, key_bytes = row + keys[(server_name, key_id)] = decode_verify_key_bytes( + key_id, bytes(key_bytes) + ) + + def _txn(txn): + for batch in batch_iter(server_name_and_key_ids, 50): + _get_keys(txn, batch) + return keys + + return self.runInteraction("get_server_verify_keys", _txn) def store_server_verify_key( self, server_name, from_server, time_now_ms, verify_key @@ -93,8 +108,11 @@ class KeyStore(SQLBaseStore): "verify_key": db_binary_type(verify_key.encode()), }, ) + # invalidate takes a tuple corresponding to the params of + # _get_server_verify_key. _get_server_verify_key only takes one + # param, which is itself the 2-tuple (server_name, key_id). txn.call_after( - self._get_server_verify_key.invalidate, (server_name, key_id) + self._get_server_verify_key.invalidate, ((server_name, key_id),) ) return self.runInteraction("store_server_verify_key", _txn) diff --git a/tests/storage/test_keys.py b/tests/storage/test_keys.py index 7170ae76c7..6bfaa00fe9 100644 --- a/tests/storage/test_keys.py +++ b/tests/storage/test_keys.py @@ -15,6 +15,8 @@ import signedjson.key +from twisted.internet.defer import Deferred + import tests.unittest KEY_1 = signedjson.key.decode_verify_key_base64( @@ -35,10 +37,55 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase): self.get_success(d) d = store.get_server_verify_keys( - "server1", ["ed25519:key1", "ed25519:key2", "ed25519:key3"] + [ + ("server1", "ed25519:key1"), + ("server1", "ed25519:key2"), + ("server1", "ed25519:key3"), + ] ) res = self.get_success(d) + self.assertEqual(len(res.keys()), 3) + self.assertEqual(res[("server1", "ed25519:key1")].version, "key1") + self.assertEqual(res[("server1", "ed25519:key2")].version, "key2") + + # non-existent result gives None + self.assertIsNone(res[("server1", "ed25519:key3")]) + + def test_cache(self): + """Check that updates correctly invalidate the cache.""" + + store = self.hs.get_datastore() + + key_id_1 = "ed25519:key1" + key_id_2 = "ed25519:key2" + + d = store.store_server_verify_key("srv1", "from_server", 0, KEY_1) + self.get_success(d) + d = store.store_server_verify_key("srv1", "from_server", 0, KEY_2) + self.get_success(d) + + d = store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)]) + res = self.get_success(d) + self.assertEqual(len(res.keys()), 2) + self.assertEqual(res[("srv1", key_id_1)], KEY_1) + self.assertEqual(res[("srv1", key_id_2)], KEY_2) + + # we should be able to look up the same thing again without a db hit + res = store.get_server_verify_keys([("srv1", key_id_1)]) + if isinstance(res, Deferred): + res = self.successResultOf(res) + self.assertEqual(len(res.keys()), 1) + self.assertEqual(res[("srv1", key_id_1)], KEY_1) + + new_key_2 = signedjson.key.get_verify_key( + signedjson.key.generate_signing_key("key2") + ) + d = store.store_server_verify_key("srv1", "from_server", 10, new_key_2) + self.get_success(d) + + d = store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)]) + res = self.get_success(d) self.assertEqual(len(res.keys()), 2) - self.assertEqual(res["ed25519:key1"].version, "key1") - self.assertEqual(res["ed25519:key2"].version, "key2") + self.assertEqual(res[("srv1", key_id_1)], KEY_1) + self.assertEqual(res[("srv1", key_id_2)], new_key_2) -- cgit 1.5.1 From 50d2a3059db29e7c9c7ccc9b005cec8497827e4b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Apr 2019 14:36:40 +0100 Subject: Fix schema upgrade when dropping tables We need to drop tables in the correct order due to foreign table constraints (on `application_services`), otherwise the DROP TABLE command will fail. Introduced in #4992. --- synapse/storage/schema/delta/54/drop_legacy_tables.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/54/drop_legacy_tables.sql b/synapse/storage/schema/delta/54/drop_legacy_tables.sql index ecca005d9b..dbbe682697 100644 --- a/synapse/storage/schema/delta/54/drop_legacy_tables.sql +++ b/synapse/storage/schema/delta/54/drop_legacy_tables.sql @@ -13,8 +13,10 @@ * limitations under the License. */ -DROP TABLE IF EXISTS application_services; +-- we need to do this first due to foreign constraints DROP TABLE IF EXISTS application_services_regex; + +DROP TABLE IF EXISTS application_services; DROP TABLE IF EXISTS transaction_id_to_pdu; DROP TABLE IF EXISTS stats_reporting; DROP TABLE IF EXISTS current_state_resets; -- cgit 1.5.1 From 747aa9f8cad92ffcda51b2aa07987c87f4353649 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 8 Apr 2019 17:10:55 +0100 Subject: Add account expiration feature --- changelog.d/5027.feature | 1 + docs/sample_config.yaml | 6 +++ synapse/api/auth.py | 12 +++++ synapse/api/errors.py | 1 + synapse/config/registration.py | 17 ++++++++ synapse/storage/prepare_database.py | 2 +- synapse/storage/registration.py | 34 +++++++++++++++ .../storage/schema/delta/54/account_validity.sql | 20 +++++++++ tests/rest/client/v2_alpha/test_register.py | 51 +++++++++++++++++++++- tests/test_state.py | 4 +- 10 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 changelog.d/5027.feature create mode 100644 synapse/storage/schema/delta/54/account_validity.sql (limited to 'synapse/storage') diff --git a/changelog.d/5027.feature b/changelog.d/5027.feature new file mode 100644 index 0000000000..12766a82a7 --- /dev/null +++ b/changelog.d/5027.feature @@ -0,0 +1 @@ +Add time-based account expiration. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 4ada0fba0e..5594c8b9af 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -643,6 +643,12 @@ uploads_path: "DATADIR/uploads" # #enable_registration: false +# Optional account validity parameter. This allows for, e.g., accounts to +# be denied any request after a given period. +# +#account_validity: +# period: 6w + # The user must provide all of the below types of 3PID when registering. # #registrations_require_3pid: diff --git a/synapse/api/auth.py b/synapse/api/auth.py index e8112d5f05..976e0dd18b 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -64,6 +64,8 @@ class Auth(object): self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000) register_cache("cache", "token_cache", self.token_cache) + self._account_validity = hs.config.account_validity + @defer.inlineCallbacks def check_from_context(self, room_version, event, context, do_sig_check=True): prev_state_ids = yield context.get_prev_state_ids(self.store) @@ -226,6 +228,16 @@ class Auth(object): token_id = user_info["token_id"] is_guest = user_info["is_guest"] + # Deny the request if the user account has expired. + if self._account_validity.enabled: + expiration_ts = yield self.store.get_expiration_ts_for_user(user) + if self.clock.time_msec() >= expiration_ts: + raise AuthError( + 403, + "User account has expired", + errcode=Codes.EXPIRED_ACCOUNT, + ) + # device_id may not be present if get_user_by_access_token has been # stubbed out. device_id = user_info.get("device_id") diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 0b464834ce..4c33450e7f 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -60,6 +60,7 @@ class Codes(object): UNSUPPORTED_ROOM_VERSION = "M_UNSUPPORTED_ROOM_VERSION" INCOMPATIBLE_ROOM_VERSION = "M_INCOMPATIBLE_ROOM_VERSION" WRONG_ROOM_KEYS_VERSION = "M_WRONG_ROOM_KEYS_VERSION" + EXPIRED_ACCOUNT = "ORG_MATRIX_EXPIRED_ACCOUNT" class CodeMessageException(RuntimeError): diff --git a/synapse/config/registration.py b/synapse/config/registration.py index f6b2b9ceee..b7a7b4f1cf 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -20,6 +20,15 @@ from synapse.types import RoomAlias from synapse.util.stringutils import random_string_with_symbols +class AccountValidityConfig(Config): + def __init__(self, config): + self.enabled = (len(config) > 0) + + period = config.get("period", None) + if period: + self.period = self.parse_duration(period) + + class RegistrationConfig(Config): def read_config(self, config): @@ -31,6 +40,8 @@ class RegistrationConfig(Config): strtobool(str(config["disable_registration"])) ) + self.account_validity = AccountValidityConfig(config.get("account_validity", {})) + self.registrations_require_3pid = config.get("registrations_require_3pid", []) self.allowed_local_3pids = config.get("allowed_local_3pids", []) self.registration_shared_secret = config.get("registration_shared_secret") @@ -75,6 +86,12 @@ class RegistrationConfig(Config): # #enable_registration: false + # Optional account validity parameter. This allows for, e.g., accounts to + # be denied any request after a given period. + # + #account_validity: + # period: 6w + # The user must provide all of the below types of 3PID when registering. # #registrations_require_3pid: diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index fa36daac52..e042221774 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 53 +SCHEMA_VERSION = 54 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 9b6c28892c..eede8ae4d2 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -86,6 +86,26 @@ class RegistrationWorkerStore(SQLBaseStore): token ) + @cachedInlineCallbacks() + def get_expiration_ts_for_user(self, user): + """Get the expiration timestamp for the account bearing a given user ID. + + Args: + user (str): The ID of the user. + Returns: + defer.Deferred: None, if the account has no expiration timestamp, + otherwise int representation of the timestamp (as a number of + milliseconds since epoch). + """ + res = yield self._simple_select_one_onecol( + table="account_validity", + keyvalues={"user_id": user.to_string()}, + retcol="expiration_ts_ms", + allow_none=True, + desc="get_expiration_date_for_user", + ) + defer.returnValue(res) + @defer.inlineCallbacks def is_server_admin(self, user): res = yield self._simple_select_one_onecol( @@ -351,6 +371,8 @@ class RegistrationStore(RegistrationWorkerStore, columns=["creation_ts"], ) + self._account_validity = hs.config.account_validity + # we no longer use refresh tokens, but it's possible that some people # might have a background update queued to build this index. Just # clear the background update. @@ -485,6 +507,18 @@ class RegistrationStore(RegistrationWorkerStore, "user_type": user_type, } ) + + if self._account_validity.enabled: + now_ms = self.clock.time_msec() + expiration_ts = now_ms + self._account_validity.period + self._simple_insert_txn( + txn, + "account_validity", + values={ + "user_id": user_id, + "expiration_ts_ms": expiration_ts, + } + ) except self.database_engine.module.IntegrityError: raise StoreError( 400, "User ID already taken.", errcode=Codes.USER_IN_USE diff --git a/synapse/storage/schema/delta/54/account_validity.sql b/synapse/storage/schema/delta/54/account_validity.sql new file mode 100644 index 0000000000..57249262d7 --- /dev/null +++ b/synapse/storage/schema/delta/54/account_validity.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Track what users are in public rooms. +CREATE TABLE IF NOT EXISTS account_validity ( + user_id TEXT PRIMARY KEY, + expiration_ts_ms BIGINT NOT NULL +); diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index a45e6e5e1f..d3611ed21f 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -1,15 +1,18 @@ +import datetime import json from synapse.api.constants import LoginType +from synapse.api.errors import Codes from synapse.appservice import ApplicationService -from synapse.rest.client.v2_alpha.register import register_servlets +from synapse.rest.client.v1 import admin, login +from synapse.rest.client.v2_alpha import register, sync from tests import unittest class RegisterRestServletTestCase(unittest.HomeserverTestCase): - servlets = [register_servlets] + servlets = [register.register_servlets] def make_homeserver(self, reactor, clock): @@ -181,3 +184,47 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.render(request) self.assertEquals(channel.result["code"], b"200", channel.result) + + +class AccountValidityTestCase(unittest.HomeserverTestCase): + + servlets = [ + register.register_servlets, + admin.register_servlets, + login.register_servlets, + sync.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + config = self.default_config() + config.enable_registration = True + config.account_validity.enabled = True + config.account_validity.period = 604800000 # Time in ms for 1 week + self.hs = self.setup_test_homeserver(config=config) + + return self.hs + + def test_validity_period(self): + self.register_user("kermit", "monkey") + tok = self.login("kermit", "monkey") + + # The specific endpoint doesn't matter, all we need is an authenticated + # endpoint. + request, channel = self.make_request( + b"GET", "/sync", access_token=tok, + ) + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) + + self.reactor.advance(datetime.timedelta(weeks=1).total_seconds()) + + request, channel = self.make_request( + b"GET", "/sync", access_token=tok, + ) + self.render(request) + + self.assertEquals(channel.result["code"], b"403", channel.result) + self.assertEquals( + channel.json_body["errcode"], Codes.EXPIRED_ACCOUNT, channel.result, + ) diff --git a/tests/test_state.py b/tests/test_state.py index e20c33322a..ce2b7eb7ed 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -24,7 +24,7 @@ from synapse.state import StateHandler, StateResolutionHandler from tests import unittest -from .utils import MockClock +from .utils import MockClock, default_config _next_event_id = 1000 @@ -159,6 +159,7 @@ class StateTestCase(unittest.TestCase): self.store = StateGroupStore() hs = Mock( spec_set=[ + "config", "get_datastore", "get_auth", "get_state_handler", @@ -166,6 +167,7 @@ class StateTestCase(unittest.TestCase): "get_state_resolution_handler", ] ) + hs.config = default_config("tesths") hs.get_datastore.return_value = self.store hs.get_state_handler.return_value = None hs.get_clock.return_value = MockClock() -- cgit 1.5.1 From 20f0617e87924c929f0db0c06d30de0c8d15081c Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 10 Apr 2019 17:58:47 +0100 Subject: Send out emails with links to extend an account's validity period --- changelog.d/5047.feature | 1 + docs/sample_config.yaml | 29 ++- synapse/api/auth.py | 5 +- synapse/config/emailconfig.py | 7 +- synapse/config/registration.py | 52 ++++- synapse/handlers/account_validity.py | 228 +++++++++++++++++++++ synapse/push/mailer.py | 14 +- synapse/push/pusher.py | 6 +- synapse/res/templates/mail-expiry.css | 4 + synapse/res/templates/notice_expiry.html | 43 ++++ synapse/res/templates/notice_expiry.txt | 7 + synapse/rest/__init__.py | 2 + synapse/rest/client/v2_alpha/account_validity.py | 62 ++++++ synapse/server.py | 5 + synapse/storage/registration.py | 168 +++++++++++++-- .../storage/schema/delta/54/account_validity.sql | 9 +- tests/rest/client/v2_alpha/test_register.py | 100 ++++++++- 17 files changed, 699 insertions(+), 43 deletions(-) create mode 100644 changelog.d/5047.feature create mode 100644 synapse/handlers/account_validity.py create mode 100644 synapse/res/templates/mail-expiry.css create mode 100644 synapse/res/templates/notice_expiry.html create mode 100644 synapse/res/templates/notice_expiry.txt create mode 100644 synapse/rest/client/v2_alpha/account_validity.py (limited to 'synapse/storage') diff --git a/changelog.d/5047.feature b/changelog.d/5047.feature new file mode 100644 index 0000000000..12766a82a7 --- /dev/null +++ b/changelog.d/5047.feature @@ -0,0 +1 @@ +Add time-based account expiration. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 5594c8b9af..8bbd437239 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -643,11 +643,31 @@ uploads_path: "DATADIR/uploads" # #enable_registration: false -# Optional account validity parameter. This allows for, e.g., accounts to -# be denied any request after a given period. +# Optional account validity configuration. This allows for accounts to be denied +# any request after a given period. +# +# ``enabled`` defines whether the account validity feature is enabled. Defaults +# to False. +# +# ``period`` allows setting the period after which an account is valid +# after its registration. When renewing the account, its validity period +# will be extended by this amount of time. This parameter is required when using +# the account validity feature. +# +# ``renew_at`` is the amount of time before an account's expiry date at which +# Synapse will send an email to the account's email address with a renewal link. +# This needs the ``email`` and ``public_baseurl`` configuration sections to be +# filled. +# +# ``renew_email_subject`` is the subject of the email sent out with the renewal +# link. ``%(app)s`` can be used as a placeholder for the ``app_name`` parameter +# from the ``email`` section. # #account_validity: +# enabled: True # period: 6w +# renew_at: 1w +# renew_email_subject: "Renew your %(app)s account" # The user must provide all of the below types of 3PID when registering. # @@ -890,7 +910,7 @@ password_config: -# Enable sending emails for notification events +# Enable sending emails for notification events or expiry notices # Defining a custom URL for Riot is only needed if email notifications # should contain links to a self-hosted installation of Riot; when set # the "app_name" setting is ignored. @@ -912,6 +932,9 @@ password_config: # #template_dir: res/templates # notif_template_html: notif_mail.html # notif_template_text: notif_mail.txt +# # Templates for account expiry notices. +# expiry_template_html: notice_expiry.html +# expiry_template_text: notice_expiry.txt # notif_for_new_users: True # riot_base_url: "http://localhost/riot" diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 976e0dd18b..4482962510 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -230,8 +230,9 @@ class Auth(object): # Deny the request if the user account has expired. if self._account_validity.enabled: - expiration_ts = yield self.store.get_expiration_ts_for_user(user) - if self.clock.time_msec() >= expiration_ts: + user_id = user.to_string() + expiration_ts = yield self.store.get_expiration_ts_for_user(user_id) + if expiration_ts and self.clock.time_msec() >= expiration_ts: raise AuthError( 403, "User account has expired", diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index 93d70cff14..60827be72f 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -71,6 +71,8 @@ class EmailConfig(Config): self.email_notif_from = email_config["notif_from"] self.email_notif_template_html = email_config["notif_template_html"] self.email_notif_template_text = email_config["notif_template_text"] + self.email_expiry_template_html = email_config["expiry_template_html"] + self.email_expiry_template_text = email_config["expiry_template_text"] template_dir = email_config.get("template_dir") # we need an absolute path, because we change directory after starting (and @@ -120,7 +122,7 @@ class EmailConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): return """ - # Enable sending emails for notification events + # Enable sending emails for notification events or expiry notices # Defining a custom URL for Riot is only needed if email notifications # should contain links to a self-hosted installation of Riot; when set # the "app_name" setting is ignored. @@ -142,6 +144,9 @@ class EmailConfig(Config): # #template_dir: res/templates # notif_template_html: notif_mail.html # notif_template_text: notif_mail.txt + # # Templates for account expiry notices. + # expiry_template_html: notice_expiry.html + # expiry_template_text: notice_expiry.txt # notif_for_new_users: True # riot_base_url: "http://localhost/riot" """ diff --git a/synapse/config/registration.py b/synapse/config/registration.py index b7a7b4f1cf..129c208204 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -21,12 +21,26 @@ from synapse.util.stringutils import random_string_with_symbols class AccountValidityConfig(Config): - def __init__(self, config): - self.enabled = (len(config) > 0) + def __init__(self, config, synapse_config): + self.enabled = config.get("enabled", False) + self.renew_by_email_enabled = ("renew_at" in config) - period = config.get("period", None) - if period: - self.period = self.parse_duration(period) + if self.enabled: + if "period" in config: + self.period = self.parse_duration(config["period"]) + else: + raise ConfigError("'period' is required when using account validity") + + if "renew_at" in config: + self.renew_at = self.parse_duration(config["renew_at"]) + + if "renew_email_subject" in config: + self.renew_email_subject = config["renew_email_subject"] + else: + self.renew_email_subject = "Renew your %(app)s account" + + if self.renew_by_email_enabled and "public_baseurl" not in synapse_config: + raise ConfigError("Can't send renewal emails without 'public_baseurl'") class RegistrationConfig(Config): @@ -40,7 +54,9 @@ class RegistrationConfig(Config): strtobool(str(config["disable_registration"])) ) - self.account_validity = AccountValidityConfig(config.get("account_validity", {})) + self.account_validity = AccountValidityConfig( + config.get("account_validity", {}), config, + ) self.registrations_require_3pid = config.get("registrations_require_3pid", []) self.allowed_local_3pids = config.get("allowed_local_3pids", []) @@ -86,11 +102,31 @@ class RegistrationConfig(Config): # #enable_registration: false - # Optional account validity parameter. This allows for, e.g., accounts to - # be denied any request after a given period. + # Optional account validity configuration. This allows for accounts to be denied + # any request after a given period. + # + # ``enabled`` defines whether the account validity feature is enabled. Defaults + # to False. + # + # ``period`` allows setting the period after which an account is valid + # after its registration. When renewing the account, its validity period + # will be extended by this amount of time. This parameter is required when using + # the account validity feature. + # + # ``renew_at`` is the amount of time before an account's expiry date at which + # Synapse will send an email to the account's email address with a renewal link. + # This needs the ``email`` and ``public_baseurl`` configuration sections to be + # filled. + # + # ``renew_email_subject`` is the subject of the email sent out with the renewal + # link. ``%%(app)s`` can be used as a placeholder for the ``app_name`` parameter + # from the ``email`` section. # #account_validity: + # enabled: True # period: 6w + # renew_at: 1w + # renew_email_subject: "Renew your %%(app)s account" # The user must provide all of the below types of 3PID when registering. # diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py new file mode 100644 index 0000000000..e82049e42d --- /dev/null +++ b/synapse/handlers/account_validity.py @@ -0,0 +1,228 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import email.mime.multipart +import email.utils +import logging +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +from twisted.internet import defer + +from synapse.api.errors import StoreError +from synapse.types import UserID +from synapse.util import stringutils +from synapse.util.logcontext import make_deferred_yieldable + +try: + from synapse.push.mailer import load_jinja2_templates +except ImportError: + load_jinja2_templates = None + +logger = logging.getLogger(__name__) + + +class AccountValidityHandler(object): + def __init__(self, hs): + self.hs = hs + self.store = self.hs.get_datastore() + self.sendmail = self.hs.get_sendmail() + self.clock = self.hs.get_clock() + + self._account_validity = self.hs.config.account_validity + + if self._account_validity.renew_by_email_enabled and load_jinja2_templates: + # Don't do email-specific configuration if renewal by email is disabled. + try: + app_name = self.hs.config.email_app_name + + self._subject = self._account_validity.renew_email_subject % { + "app": app_name, + } + + self._from_string = self.hs.config.email_notif_from % { + "app": app_name, + } + except Exception: + # If substitution failed, fall back to the bare strings. + self._subject = self._account_validity.renew_email_subject + self._from_string = self.hs.config.email_notif_from + + self._raw_from = email.utils.parseaddr(self._from_string)[1] + + self._template_html, self._template_text = load_jinja2_templates( + config=self.hs.config, + template_html_name=self.hs.config.email_expiry_template_html, + template_text_name=self.hs.config.email_expiry_template_text, + ) + + # Check the renewal emails to send and send them every 30min. + self.clock.looping_call( + self.send_renewal_emails, + 30 * 60 * 1000, + ) + + @defer.inlineCallbacks + def send_renewal_emails(self): + """Gets the list of users whose account is expiring in the amount of time + configured in the ``renew_at`` parameter from the ``account_validity`` + configuration, and sends renewal emails to all of these users as long as they + have an email 3PID attached to their account. + """ + expiring_users = yield self.store.get_users_expiring_soon() + + if expiring_users: + for user in expiring_users: + yield self._send_renewal_email( + user_id=user["user_id"], + expiration_ts=user["expiration_ts_ms"], + ) + + @defer.inlineCallbacks + def _send_renewal_email(self, user_id, expiration_ts): + """Sends out a renewal email to every email address attached to the given user + with a unique link allowing them to renew their account. + + Args: + user_id (str): ID of the user to send email(s) to. + expiration_ts (int): Timestamp in milliseconds for the expiration date of + this user's account (used in the email templates). + """ + addresses = yield self._get_email_addresses_for_user(user_id) + + # Stop right here if the user doesn't have at least one email address. + # In this case, they will have to ask their server admin to renew their + # account manually. + if not addresses: + return + + try: + user_display_name = yield self.store.get_profile_displayname( + UserID.from_string(user_id).localpart + ) + if user_display_name is None: + user_display_name = user_id + except StoreError: + user_display_name = user_id + + renewal_token = yield self._get_renewal_token(user_id) + url = "%s_matrix/client/unstable/account_validity/renew?token=%s" % ( + self.hs.config.public_baseurl, + renewal_token, + ) + + template_vars = { + "display_name": user_display_name, + "expiration_ts": expiration_ts, + "url": url, + } + + html_text = self._template_html.render(**template_vars) + html_part = MIMEText(html_text, "html", "utf8") + + plain_text = self._template_text.render(**template_vars) + text_part = MIMEText(plain_text, "plain", "utf8") + + for address in addresses: + raw_to = email.utils.parseaddr(address)[1] + + multipart_msg = MIMEMultipart('alternative') + multipart_msg['Subject'] = self._subject + multipart_msg['From'] = self._from_string + multipart_msg['To'] = address + multipart_msg['Date'] = email.utils.formatdate() + multipart_msg['Message-ID'] = email.utils.make_msgid() + multipart_msg.attach(text_part) + multipart_msg.attach(html_part) + + logger.info("Sending renewal email to %s", address) + + yield make_deferred_yieldable(self.sendmail( + self.hs.config.email_smtp_host, + self._raw_from, raw_to, multipart_msg.as_string().encode('utf8'), + reactor=self.hs.get_reactor(), + port=self.hs.config.email_smtp_port, + requireAuthentication=self.hs.config.email_smtp_user is not None, + username=self.hs.config.email_smtp_user, + password=self.hs.config.email_smtp_pass, + requireTransportSecurity=self.hs.config.require_transport_security + )) + + yield self.store.set_renewal_mail_status( + user_id=user_id, + email_sent=True, + ) + + @defer.inlineCallbacks + def _get_email_addresses_for_user(self, user_id): + """Retrieve the list of email addresses attached to a user's account. + + Args: + user_id (str): ID of the user to lookup email addresses for. + + Returns: + defer.Deferred[list[str]]: Email addresses for this account. + """ + threepids = yield self.store.user_get_threepids(user_id) + + addresses = [] + for threepid in threepids: + if threepid["medium"] == "email": + addresses.append(threepid["address"]) + + defer.returnValue(addresses) + + @defer.inlineCallbacks + def _get_renewal_token(self, user_id): + """Generates a 32-byte long random string that will be inserted into the + user's renewal email's unique link, then saves it into the database. + + Args: + user_id (str): ID of the user to generate a string for. + + Returns: + defer.Deferred[str]: The generated string. + + Raises: + StoreError(500): Couldn't generate a unique string after 5 attempts. + """ + attempts = 0 + while attempts < 5: + try: + renewal_token = stringutils.random_string(32) + yield self.store.set_renewal_token_for_user(user_id, renewal_token) + defer.returnValue(renewal_token) + except StoreError: + attempts += 1 + raise StoreError(500, "Couldn't generate a unique string as refresh string.") + + @defer.inlineCallbacks + def renew_account(self, renewal_token): + """Renews the account attached to a given renewal token by pushing back the + expiration date by the current validity period in the server's configuration. + + Args: + renewal_token (str): Token sent with the renewal request. + """ + user_id = yield self.store.get_user_from_renewal_token(renewal_token) + + logger.debug("Renewing an account for user %s", user_id) + + new_expiration_date = self.clock.time_msec() + self._account_validity.period + + yield self.store.renew_account_for_user( + user_id=user_id, + new_expiration_ts=new_expiration_date, + ) diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 1eb5be0957..c269bcf4a4 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -521,11 +521,11 @@ def format_ts_filter(value, format): return time.strftime(format, time.localtime(value / 1000)) -def load_jinja2_templates(config): +def load_jinja2_templates(config, template_html_name, template_text_name): """Load the jinja2 email templates from disk Returns: - (notif_template_html, notif_template_text) + (template_html, template_text) """ logger.info("loading email templates from '%s'", config.email_template_dir) loader = jinja2.FileSystemLoader(config.email_template_dir) @@ -533,14 +533,10 @@ def load_jinja2_templates(config): env.filters["format_ts"] = format_ts_filter env.filters["mxc_to_http"] = _create_mxc_to_http_filter(config) - notif_template_html = env.get_template( - config.email_notif_template_html - ) - notif_template_text = env.get_template( - config.email_notif_template_text - ) + template_html = env.get_template(template_html_name) + template_text = env.get_template(template_text_name) - return notif_template_html, notif_template_text + return template_html, template_text def _create_mxc_to_http_filter(config): diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index b33f2a357b..14bc7823cf 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -44,7 +44,11 @@ class PusherFactory(object): if hs.config.email_enable_notifs: self.mailers = {} # app_name -> Mailer - templates = load_jinja2_templates(hs.config) + templates = load_jinja2_templates( + config=hs.config, + template_html_name=hs.config.email_notif_template_html, + template_text_name=hs.config.email_notif_template_text, + ) self.notif_template_html, self.notif_template_text = templates self.pusher_types["email"] = self._create_email_pusher diff --git a/synapse/res/templates/mail-expiry.css b/synapse/res/templates/mail-expiry.css new file mode 100644 index 0000000000..3dea486467 --- /dev/null +++ b/synapse/res/templates/mail-expiry.css @@ -0,0 +1,4 @@ +.noticetext { + margin-top: 10px; + margin-bottom: 10px; +} diff --git a/synapse/res/templates/notice_expiry.html b/synapse/res/templates/notice_expiry.html new file mode 100644 index 0000000000..f0d7c66e1b --- /dev/null +++ b/synapse/res/templates/notice_expiry.html @@ -0,0 +1,43 @@ + + + + + + + + + + + + +
+ + + + + + + + +
+
Hi {{ display_name }},
+
+
Your account will expire on {{ expiration_ts|format_ts("%d-%m-%Y") }}. This means that you will lose access to your account after this date.
+
To extend the validity of your account, please click on the link bellow (or copy and paste it into a new browser tab):
+ +
+
+ + diff --git a/synapse/res/templates/notice_expiry.txt b/synapse/res/templates/notice_expiry.txt new file mode 100644 index 0000000000..41f1c4279c --- /dev/null +++ b/synapse/res/templates/notice_expiry.txt @@ -0,0 +1,7 @@ +Hi {{ display_name }}, + +Your account will expire on {{ expiration_ts|format_ts("%d-%m-%Y") }}. This means that you will lose access to your account after this date. + +To extend the validity of your account, please click on the link bellow (or copy and paste it to a new browser tab): + +{{ url }} diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 91f5247d52..a66885d349 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -33,6 +33,7 @@ from synapse.rest.client.v1 import ( from synapse.rest.client.v2_alpha import ( account, account_data, + account_validity, auth, capabilities, devices, @@ -109,3 +110,4 @@ class ClientRestResource(JsonResource): groups.register_servlets(hs, client_resource) room_upgrade_rest_servlet.register_servlets(hs, client_resource) capabilities.register_servlets(hs, client_resource) + account_validity.register_servlets(hs, client_resource) diff --git a/synapse/rest/client/v2_alpha/account_validity.py b/synapse/rest/client/v2_alpha/account_validity.py new file mode 100644 index 0000000000..1ff6a6b638 --- /dev/null +++ b/synapse/rest/client/v2_alpha/account_validity.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.api.errors import SynapseError +from synapse.http.server import finish_request +from synapse.http.servlet import RestServlet + +from ._base import client_v2_patterns + +logger = logging.getLogger(__name__) + + +class AccountValidityRenewServlet(RestServlet): + PATTERNS = client_v2_patterns("/account_validity/renew$") + SUCCESS_HTML = b"Your account has been successfully renewed." + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(AccountValidityRenewServlet, self).__init__() + + self.hs = hs + self.account_activity_handler = hs.get_account_validity_handler() + + @defer.inlineCallbacks + def on_GET(self, request): + if b"token" not in request.args: + raise SynapseError(400, "Missing renewal token") + renewal_token = request.args[b"token"][0] + + yield self.account_activity_handler.renew_account(renewal_token.decode('utf8')) + + request.setResponseCode(200) + request.setHeader(b"Content-Type", b"text/html; charset=utf-8") + request.setHeader(b"Content-Length", b"%d" % ( + len(AccountValidityRenewServlet.SUCCESS_HTML), + )) + request.write(AccountValidityRenewServlet.SUCCESS_HTML) + finish_request(request) + defer.returnValue(None) + + +def register_servlets(hs, http_server): + AccountValidityRenewServlet(hs).register(http_server) diff --git a/synapse/server.py b/synapse/server.py index dc8f1ccb8c..8c30ac2fa5 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -47,6 +47,7 @@ from synapse.federation.transport.client import TransportLayerClient from synapse.groups.attestations import GroupAttestationSigning, GroupAttestionRenewer from synapse.groups.groups_server import GroupsServerHandler from synapse.handlers import Handlers +from synapse.handlers.account_validity import AccountValidityHandler from synapse.handlers.acme import AcmeHandler from synapse.handlers.appservice import ApplicationServicesHandler from synapse.handlers.auth import AuthHandler, MacaroonGenerator @@ -183,6 +184,7 @@ class HomeServer(object): 'room_context_handler', 'sendmail', 'registration_handler', + 'account_validity_handler', ] REQUIRED_ON_MASTER_STARTUP = [ @@ -506,6 +508,9 @@ class HomeServer(object): def build_registration_handler(self): return RegistrationHandler(self) + def build_account_validity_handler(self): + return AccountValidityHandler(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index eede8ae4d2..a78850259f 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -32,6 +32,7 @@ class RegistrationWorkerStore(SQLBaseStore): super(RegistrationWorkerStore, self).__init__(db_conn, hs) self.config = hs.config + self.clock = hs.get_clock() @cached() def get_user_by_id(self, user_id): @@ -87,25 +88,156 @@ class RegistrationWorkerStore(SQLBaseStore): ) @cachedInlineCallbacks() - def get_expiration_ts_for_user(self, user): + def get_expiration_ts_for_user(self, user_id): """Get the expiration timestamp for the account bearing a given user ID. Args: - user (str): The ID of the user. + user_id (str): The ID of the user. Returns: defer.Deferred: None, if the account has no expiration timestamp, - otherwise int representation of the timestamp (as a number of - milliseconds since epoch). + otherwise int representation of the timestamp (as a number of + milliseconds since epoch). """ res = yield self._simple_select_one_onecol( table="account_validity", - keyvalues={"user_id": user.to_string()}, + keyvalues={"user_id": user_id}, retcol="expiration_ts_ms", allow_none=True, - desc="get_expiration_date_for_user", + desc="get_expiration_ts_for_user", + ) + defer.returnValue(res) + + @defer.inlineCallbacks + def renew_account_for_user(self, user_id, new_expiration_ts): + """Updates the account validity table with a new timestamp for a given + user, removes the existing renewal token from this user, and unsets the + flag indicating that an email has been sent for renewing this account. + + Args: + user_id (str): ID of the user whose account validity to renew. + new_expiration_ts: New expiration date, as a timestamp in milliseconds + since epoch. + """ + def renew_account_for_user_txn(txn): + self._simple_update_txn( + txn=txn, + table="account_validity", + keyvalues={"user_id": user_id}, + updatevalues={ + "expiration_ts_ms": new_expiration_ts, + "email_sent": False, + "renewal_token": None, + }, + ) + self._invalidate_cache_and_stream( + txn, self.get_expiration_ts_for_user, (user_id,), + ) + + yield self.runInteraction( + "renew_account_for_user", + renew_account_for_user_txn, + ) + + @defer.inlineCallbacks + def set_renewal_token_for_user(self, user_id, renewal_token): + """Defines a renewal token for a given user. + + Args: + user_id (str): ID of the user to set the renewal token for. + renewal_token (str): Random unique string that will be used to renew the + user's account. + + Raises: + StoreError: The provided token is already set for another user. + """ + yield self._simple_update_one( + table="account_validity", + keyvalues={"user_id": user_id}, + updatevalues={"renewal_token": renewal_token}, + desc="set_renewal_token_for_user", + ) + + @defer.inlineCallbacks + def get_user_from_renewal_token(self, renewal_token): + """Get a user ID from a renewal token. + + Args: + renewal_token (str): The renewal token to perform the lookup with. + + Returns: + defer.Deferred[str]: The ID of the user to which the token belongs. + """ + res = yield self._simple_select_one_onecol( + table="account_validity", + keyvalues={"renewal_token": renewal_token}, + retcol="user_id", + desc="get_user_from_renewal_token", + ) + + defer.returnValue(res) + + @defer.inlineCallbacks + def get_renewal_token_for_user(self, user_id): + """Get the renewal token associated with a given user ID. + + Args: + user_id (str): The user ID to lookup a token for. + + Returns: + defer.Deferred[str]: The renewal token associated with this user ID. + """ + res = yield self._simple_select_one_onecol( + table="account_validity", + keyvalues={"user_id": user_id}, + retcol="renewal_token", + desc="get_renewal_token_for_user", ) + defer.returnValue(res) + @defer.inlineCallbacks + def get_users_expiring_soon(self): + """Selects users whose account will expire in the [now, now + renew_at] time + window (see configuration for account_validity for information on what renew_at + refers to). + + Returns: + Deferred: Resolves to a list[dict[user_id (str), expiration_ts_ms (int)]] + """ + def select_users_txn(txn, now_ms, renew_at): + sql = ( + "SELECT user_id, expiration_ts_ms FROM account_validity" + " WHERE email_sent = ? AND (expiration_ts_ms - ?) <= ?" + ) + values = [False, now_ms, renew_at] + txn.execute(sql, values) + return self.cursor_to_dict(txn) + + res = yield self.runInteraction( + "get_users_expiring_soon", + select_users_txn, + self.clock.time_msec(), self.config.account_validity.renew_at, + ) + + defer.returnValue(res) + + @defer.inlineCallbacks + def set_renewal_mail_status(self, user_id, email_sent): + """Sets or unsets the flag that indicates whether a renewal email has been sent + to the user (and the user hasn't renewed their account yet). + + Args: + user_id (str): ID of the user to set/unset the flag for. + email_sent (bool): Flag which indicates whether a renewal email has been sent + to this user. + """ + yield self._simple_update_one( + table="account_validity", + keyvalues={"user_id": user_id}, + updatevalues={"email_sent": email_sent}, + desc="set_renewal_mail_status", + ) + @defer.inlineCallbacks def is_server_admin(self, user): res = yield self._simple_select_one_onecol( @@ -508,22 +640,24 @@ class RegistrationStore(RegistrationWorkerStore, } ) - if self._account_validity.enabled: - now_ms = self.clock.time_msec() - expiration_ts = now_ms + self._account_validity.period - self._simple_insert_txn( - txn, - "account_validity", - values={ - "user_id": user_id, - "expiration_ts_ms": expiration_ts, - } - ) except self.database_engine.module.IntegrityError: raise StoreError( 400, "User ID already taken.", errcode=Codes.USER_IN_USE ) + if self._account_validity.enabled: + now_ms = self.clock.time_msec() + expiration_ts = now_ms + self._account_validity.period + self._simple_insert_txn( + txn, + "account_validity", + values={ + "user_id": user_id, + "expiration_ts_ms": expiration_ts, + "email_sent": False, + } + ) + if token: # it's possible for this to get a conflict, but only for a single user # since tokens are namespaced based on their user ID diff --git a/synapse/storage/schema/delta/54/account_validity.sql b/synapse/storage/schema/delta/54/account_validity.sql index 57249262d7..2357626000 100644 --- a/synapse/storage/schema/delta/54/account_validity.sql +++ b/synapse/storage/schema/delta/54/account_validity.sql @@ -13,8 +13,15 @@ * limitations under the License. */ +DROP TABLE IF EXISTS account_validity; + -- Track what users are in public rooms. CREATE TABLE IF NOT EXISTS account_validity ( user_id TEXT PRIMARY KEY, - expiration_ts_ms BIGINT NOT NULL + expiration_ts_ms BIGINT NOT NULL, + email_sent BOOLEAN NOT NULL, + renewal_token TEXT ); + +CREATE INDEX account_validity_email_sent_idx ON account_validity(email_sent, expiration_ts_ms) +CREATE UNIQUE INDEX account_validity_renewal_string_idx ON account_validity(renewal_token) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index d3611ed21f..8fb5140a05 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -1,14 +1,22 @@ import datetime import json +import os + +import pkg_resources from synapse.api.constants import LoginType from synapse.api.errors import Codes from synapse.appservice import ApplicationService from synapse.rest.client.v1 import admin, login -from synapse.rest.client.v2_alpha import register, sync +from synapse.rest.client.v2_alpha import account_validity, register, sync from tests import unittest +try: + from synapse.push.mailer import load_jinja2_templates +except ImportError: + load_jinja2_templates = None + class RegisterRestServletTestCase(unittest.HomeserverTestCase): @@ -197,6 +205,7 @@ class AccountValidityTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor, clock): config = self.default_config() + # Test for account expiring after a week. config.enable_registration = True config.account_validity.enabled = True config.account_validity.period = 604800000 # Time in ms for 1 week @@ -228,3 +237,92 @@ class AccountValidityTestCase(unittest.HomeserverTestCase): self.assertEquals( channel.json_body["errcode"], Codes.EXPIRED_ACCOUNT, channel.result, ) + + +class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): + + skip = "No Jinja installed" if not load_jinja2_templates else None + servlets = [ + register.register_servlets, + admin.register_servlets, + login.register_servlets, + sync.register_servlets, + account_validity.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + config = self.default_config() + # Test for account expiring after a week and renewal emails being sent 2 + # days before expiry. + config.enable_registration = True + config.account_validity.enabled = True + config.account_validity.renew_by_email_enabled = True + config.account_validity.period = 604800000 # Time in ms for 1 week + config.account_validity.renew_at = 172800000 # Time in ms for 2 days + config.account_validity.renew_email_subject = "Renew your account" + + # Email config. + self.email_attempts = [] + + def sendmail(*args, **kwargs): + self.email_attempts.append((args, kwargs)) + return + + config.email_template_dir = os.path.abspath( + pkg_resources.resource_filename('synapse', 'res/templates') + ) + config.email_expiry_template_html = "notice_expiry.html" + config.email_expiry_template_text = "notice_expiry.txt" + config.email_smtp_host = "127.0.0.1" + config.email_smtp_port = 20 + config.require_transport_security = False + config.email_smtp_user = None + config.email_smtp_pass = None + config.email_notif_from = "test@example.com" + + self.hs = self.setup_test_homeserver(config=config, sendmail=sendmail) + + self.store = self.hs.get_datastore() + + return self.hs + + def test_renewal_email(self): + user_id = self.register_user("kermit", "monkey") + tok = self.login("kermit", "monkey") + # We need to manually add an email address otherwise the handler will do + # nothing. + now = self.hs.clock.time_msec() + self.get_success(self.store.user_add_threepid( + user_id=user_id, medium="email", address="kermit@example.com", + validated_at=now, added_at=now, + )) + + # The specific endpoint doesn't matter, all we need is an authenticated + # endpoint. + request, channel = self.make_request( + b"GET", "/sync", access_token=tok, + ) + self.render(request) + self.assertEquals(channel.result["code"], b"200", channel.result) + + # Move 6 days forward. This should trigger a renewal email to be sent. + self.reactor.advance(datetime.timedelta(days=6).total_seconds()) + self.assertEqual(len(self.email_attempts), 1) + + # Retrieving the URL from the email is too much pain for now, so we + # retrieve the token from the DB. + renewal_token = self.get_success(self.store.get_renewal_token_for_user(user_id)) + url = "/_matrix/client/unstable/account_validity/renew?token=%s" % renewal_token + request, channel = self.make_request(b"GET", url) + self.render(request) + self.assertEquals(channel.result["code"], b"200", channel.result) + + # Move 3 days forward. If the renewal failed, every authed request with + # our access token should be denied from now, otherwise they should + # succeed. + self.reactor.advance(datetime.timedelta(days=3).total_seconds()) + request, channel = self.make_request( + b"GET", "/sync", access_token=tok, + ) + self.render(request) + self.assertEquals(channel.result["code"], b"200", channel.result) -- cgit 1.5.1 From eaf41a943b2cd3f7f32d142c9552d558eb37a074 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 16 Apr 2019 20:13:59 +0100 Subject: Add management endpoints for account validity --- changelog.d/5073.feature | 1 + docs/admin_api/account_validity.rst | 42 +++++++++++ synapse/api/auth.py | 2 +- synapse/handlers/account_validity.py | 33 +++++++- synapse/rest/client/v1/admin.py | 39 ++++++++++ synapse/rest/client/v2_alpha/account_validity.py | 31 +++++++- synapse/storage/registration.py | 29 +++++--- tests/rest/client/v2_alpha/test_register.py | 95 ++++++++++++++++++++++-- 8 files changed, 246 insertions(+), 26 deletions(-) create mode 100644 changelog.d/5073.feature create mode 100644 docs/admin_api/account_validity.rst (limited to 'synapse/storage') diff --git a/changelog.d/5073.feature b/changelog.d/5073.feature new file mode 100644 index 0000000000..12766a82a7 --- /dev/null +++ b/changelog.d/5073.feature @@ -0,0 +1 @@ +Add time-based account expiration. diff --git a/docs/admin_api/account_validity.rst b/docs/admin_api/account_validity.rst new file mode 100644 index 0000000000..980ea23605 --- /dev/null +++ b/docs/admin_api/account_validity.rst @@ -0,0 +1,42 @@ +Account validity API +==================== + +This API allows a server administrator to manage the validity of an account. To +use it, you must enable the account validity feature (under +``account_validity``) in Synapse's configuration. + +Renew account +------------- + +This API extends the validity of an account by as much time as configured in the +``period`` parameter from the ``account_validity`` configuration. + +The API is:: + + POST /_matrix/client/unstable/account_validity/send_mail + +with the following body: + +.. code:: json + + { + "user_id": "", + "expiration_ts": 0, + "enable_renewal_emails": true + } + + +``expiration_ts`` is an optional parameter and overrides the expiration date, +which otherwise defaults to now + validity period. + +``enable_renewal_emails`` is also an optional parameter and enables/disables +sending renewal emails to the user. Defaults to true. + +The API returns with the new expiration date for this account, as a timestamp in +milliseconds since epoch: + +.. code:: json + + { + "expiration_ts": 0 + } diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 4482962510..960e66dbdc 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -232,7 +232,7 @@ class Auth(object): if self._account_validity.enabled: user_id = user.to_string() expiration_ts = yield self.store.get_expiration_ts_for_user(user_id) - if expiration_ts and self.clock.time_msec() >= expiration_ts: + if expiration_ts is not None and self.clock.time_msec() >= expiration_ts: raise AuthError( 403, "User account has expired", diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index e82049e42d..261446517d 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -90,6 +90,11 @@ class AccountValidityHandler(object): expiration_ts=user["expiration_ts_ms"], ) + @defer.inlineCallbacks + def send_renewal_email_to_user(self, user_id): + expiration_ts = yield self.store.get_expiration_ts_for_user(user_id) + yield self._send_renewal_email(user_id, expiration_ts) + @defer.inlineCallbacks def _send_renewal_email(self, user_id, expiration_ts): """Sends out a renewal email to every email address attached to the given user @@ -217,12 +222,32 @@ class AccountValidityHandler(object): renewal_token (str): Token sent with the renewal request. """ user_id = yield self.store.get_user_from_renewal_token(renewal_token) - logger.debug("Renewing an account for user %s", user_id) + yield self.renew_account_for_user(user_id) - new_expiration_date = self.clock.time_msec() + self._account_validity.period + @defer.inlineCallbacks + def renew_account_for_user(self, user_id, expiration_ts=None, email_sent=False): + """Renews the account attached to a given user by pushing back the + expiration date by the current validity period in the server's + configuration. - yield self.store.renew_account_for_user( + Args: + renewal_token (str): Token sent with the renewal request. + expiration_ts (int): New expiration date. Defaults to now + validity period. + email_sent (bool): Whether an email has been sent for this validity period. + Defaults to False. + + Returns: + defer.Deferred[int]: New expiration date for this account, as a timestamp + in milliseconds since epoch. + """ + if expiration_ts is None: + expiration_ts = self.clock.time_msec() + self._account_validity.period + + yield self.store.set_account_validity_for_user( user_id=user_id, - new_expiration_ts=new_expiration_date, + expiration_ts=expiration_ts, + email_sent=email_sent, ) + + defer.returnValue(expiration_ts) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index e788769639..d27472c538 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -786,6 +786,44 @@ class SearchUsersRestServlet(ClientV1RestServlet): defer.returnValue((200, ret)) +class AccountValidityRenewServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns("/admin/account_validity/validity$") + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(AccountValidityRenewServlet, self).__init__(hs) + + self.hs = hs + self.account_activity_handler = hs.get_account_validity_handler() + self.auth = hs.get_auth() + + @defer.inlineCallbacks + def on_POST(self, request): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + + if not is_admin: + raise AuthError(403, "You are not a server admin") + + body = parse_json_object_from_request(request) + + if "user_id" not in body: + raise SynapseError(400, "Missing property 'user_id' in the request body") + + expiration_ts = yield self.account_activity_handler.renew_account_for_user( + body["user_id"], body.get("expiration_ts"), + not body.get("enable_renewal_emails", True), + ) + + res = { + "expiration_ts": expiration_ts, + } + defer.returnValue((200, res)) + + def register_servlets(hs, http_server): WhoisRestServlet(hs).register(http_server) PurgeMediaCacheRestServlet(hs).register(http_server) @@ -801,3 +839,4 @@ def register_servlets(hs, http_server): ListMediaInRoom(hs).register(http_server) UserRegisterServlet(hs).register(http_server) VersionServlet(hs).register(http_server) + AccountValidityRenewServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/account_validity.py b/synapse/rest/client/v2_alpha/account_validity.py index 1ff6a6b638..fc8dbeb617 100644 --- a/synapse/rest/client/v2_alpha/account_validity.py +++ b/synapse/rest/client/v2_alpha/account_validity.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.api.errors import SynapseError +from synapse.api.errors import AuthError, SynapseError from synapse.http.server import finish_request from synapse.http.servlet import RestServlet @@ -39,6 +39,7 @@ class AccountValidityRenewServlet(RestServlet): self.hs = hs self.account_activity_handler = hs.get_account_validity_handler() + self.auth = hs.get_auth() @defer.inlineCallbacks def on_GET(self, request): @@ -58,5 +59,33 @@ class AccountValidityRenewServlet(RestServlet): defer.returnValue(None) +class AccountValiditySendMailServlet(RestServlet): + PATTERNS = client_v2_patterns("/account_validity/send_mail$") + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(AccountValiditySendMailServlet, self).__init__() + + self.hs = hs + self.account_activity_handler = hs.get_account_validity_handler() + self.auth = hs.get_auth() + self.account_validity = self.hs.config.account_validity + + @defer.inlineCallbacks + def on_POST(self, request): + if not self.account_validity.renew_by_email_enabled: + raise AuthError(403, "Account renewal via email is disabled on this server.") + + requester = yield self.auth.get_user_by_req(request) + user_id = requester.user.to_string() + yield self.account_activity_handler.send_renewal_email_to_user(user_id) + + defer.returnValue((200, {})) + + def register_servlets(hs, http_server): AccountValidityRenewServlet(hs).register(http_server) + AccountValiditySendMailServlet(hs).register(http_server) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index a78850259f..dfdb4e7e34 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -108,25 +108,30 @@ class RegistrationWorkerStore(SQLBaseStore): defer.returnValue(res) @defer.inlineCallbacks - def renew_account_for_user(self, user_id, new_expiration_ts): - """Updates the account validity table with a new timestamp for a given - user, removes the existing renewal token from this user, and unsets the - flag indicating that an email has been sent for renewing this account. + def set_account_validity_for_user(self, user_id, expiration_ts, email_sent, + renewal_token=None): + """Updates the account validity properties of the given account, with the + given values. Args: - user_id (str): ID of the user whose account validity to renew. - new_expiration_ts: New expiration date, as a timestamp in milliseconds + user_id (str): ID of the account to update properties for. + expiration_ts (int): New expiration date, as a timestamp in milliseconds since epoch. + email_sent (bool): True means a renewal email has been sent for this + account and there's no need to send another one for the current validity + period. + renewal_token (str): Renewal token the user can use to extend the validity + of their account. Defaults to no token. """ - def renew_account_for_user_txn(txn): + def set_account_validity_for_user_txn(txn): self._simple_update_txn( txn=txn, table="account_validity", keyvalues={"user_id": user_id}, updatevalues={ - "expiration_ts_ms": new_expiration_ts, - "email_sent": False, - "renewal_token": None, + "expiration_ts_ms": expiration_ts, + "email_sent": email_sent, + "renewal_token": renewal_token, }, ) self._invalidate_cache_and_stream( @@ -134,8 +139,8 @@ class RegistrationWorkerStore(SQLBaseStore): ) yield self.runInteraction( - "renew_account_for_user", - renew_account_for_user_txn, + "set_account_validity_for_user", + set_account_validity_for_user_txn, ) @defer.inlineCallbacks diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 8fb5140a05..3d44667489 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -201,6 +201,7 @@ class AccountValidityTestCase(unittest.HomeserverTestCase): admin.register_servlets, login.register_servlets, sync.register_servlets, + account_validity.register_servlets, ] def make_homeserver(self, reactor, clock): @@ -238,6 +239,68 @@ class AccountValidityTestCase(unittest.HomeserverTestCase): channel.json_body["errcode"], Codes.EXPIRED_ACCOUNT, channel.result, ) + def test_manual_renewal(self): + user_id = self.register_user("kermit", "monkey") + tok = self.login("kermit", "monkey") + + self.reactor.advance(datetime.timedelta(weeks=1).total_seconds()) + + # If we register the admin user at the beginning of the test, it will + # expire at the same time as the normal user and the renewal request + # will be denied. + self.register_user("admin", "adminpassword", admin=True) + admin_tok = self.login("admin", "adminpassword") + + url = "/_matrix/client/unstable/admin/account_validity/validity" + params = { + "user_id": user_id, + } + request_data = json.dumps(params) + request, channel = self.make_request( + b"POST", url, request_data, access_token=admin_tok, + ) + self.render(request) + self.assertEquals(channel.result["code"], b"200", channel.result) + + # The specific endpoint doesn't matter, all we need is an authenticated + # endpoint. + request, channel = self.make_request( + b"GET", "/sync", access_token=tok, + ) + self.render(request) + self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_manual_expire(self): + user_id = self.register_user("kermit", "monkey") + tok = self.login("kermit", "monkey") + + self.register_user("admin", "adminpassword", admin=True) + admin_tok = self.login("admin", "adminpassword") + + url = "/_matrix/client/unstable/admin/account_validity/validity" + params = { + "user_id": user_id, + "expiration_ts": 0, + "enable_renewal_emails": False, + } + request_data = json.dumps(params) + request, channel = self.make_request( + b"POST", url, request_data, access_token=admin_tok, + ) + self.render(request) + self.assertEquals(channel.result["code"], b"200", channel.result) + + # The specific endpoint doesn't matter, all we need is an authenticated + # endpoint. + request, channel = self.make_request( + b"GET", "/sync", access_token=tok, + ) + self.render(request) + self.assertEquals(channel.result["code"], b"403", channel.result) + self.assertEquals( + channel.json_body["errcode"], Codes.EXPIRED_ACCOUNT, channel.result, + ) + class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): @@ -287,6 +350,8 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): return self.hs def test_renewal_email(self): + self.email_attempts = [] + user_id = self.register_user("kermit", "monkey") tok = self.login("kermit", "monkey") # We need to manually add an email address otherwise the handler will do @@ -297,14 +362,6 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): validated_at=now, added_at=now, )) - # The specific endpoint doesn't matter, all we need is an authenticated - # endpoint. - request, channel = self.make_request( - b"GET", "/sync", access_token=tok, - ) - self.render(request) - self.assertEquals(channel.result["code"], b"200", channel.result) - # Move 6 days forward. This should trigger a renewal email to be sent. self.reactor.advance(datetime.timedelta(days=6).total_seconds()) self.assertEqual(len(self.email_attempts), 1) @@ -326,3 +383,25 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): ) self.render(request) self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_manual_email_send(self): + self.email_attempts = [] + + user_id = self.register_user("kermit", "monkey") + tok = self.login("kermit", "monkey") + # We need to manually add an email address otherwise the handler will do + # nothing. + now = self.hs.clock.time_msec() + self.get_success(self.store.user_add_threepid( + user_id=user_id, medium="email", address="kermit@example.com", + validated_at=now, added_at=now, + )) + + request, channel = self.make_request( + b"POST", "/_matrix/client/unstable/account_validity/send_mail", + access_token=tok, + ) + self.render(request) + self.assertEquals(channel.result["code"], b"200", channel.result) + + self.assertEqual(len(self.email_attempts), 1) -- cgit 1.5.1 From bd0d45ca69587f4f258b738dfa3a55704838081e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 26 Apr 2019 11:13:16 +0100 Subject: Fix infinite loop in presence handler Fixes #5102 --- changelog.d/5103.bugfix | 1 + synapse/handlers/presence.py | 5 +++++ synapse/storage/state_deltas.py | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 changelog.d/5103.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/5103.bugfix b/changelog.d/5103.bugfix new file mode 100644 index 0000000000..590d80d58f --- /dev/null +++ b/changelog.d/5103.bugfix @@ -0,0 +1 @@ +Fix bug where presence updates were sent to all servers in a room when a new server joined, rather than to just the new server. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index bd1285b15c..59d53f1050 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -828,6 +828,11 @@ class PresenceHandler(object): if typ != EventTypes.Member: continue + if event_id is None: + # state has been deleted, so this is not a join. We only care about + # joins. + continue + event = yield self.store.get_event(event_id) if event.content.get("membership") != Membership.JOIN: # We only care about joins diff --git a/synapse/storage/state_deltas.py b/synapse/storage/state_deltas.py index 56e42f583d..31a0279b18 100644 --- a/synapse/storage/state_deltas.py +++ b/synapse/storage/state_deltas.py @@ -22,6 +22,24 @@ logger = logging.getLogger(__name__) class StateDeltasStore(SQLBaseStore): def get_current_state_deltas(self, prev_stream_id): + """Fetch a list of room state changes since the given stream id + + Each entry in the result contains the following fields: + - stream_id (int) + - room_id (str) + - type (str): event type + - state_key (str): + - event_id (str|None): new event_id for this state key. None if the + state has been deleted. + - prev_event_id (str|None): previous event_id for this state key. None + if it's new state. + + Args: + prev_stream_id (int): point to get changes since (exclusive) + + Returns: + Deferred[list[dict]]: results + """ prev_stream_id = int(prev_stream_id) if not self._curr_state_delta_stream_cache.has_any_entity_changed( prev_stream_id -- cgit 1.5.1 From 11ea16777f52264f0a1d6f4db5b7db5c6c147523 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Thu, 9 May 2019 12:01:41 +0200 Subject: Limit the number of EDUs in transactions to 100 as expected by receiver (#5138) Fixes #3951. --- changelog.d/5138.bugfix | 1 + synapse/federation/sender/per_destination_queue.py | 56 ++++++++++++---------- synapse/storage/deviceinbox.py | 2 +- 3 files changed, 32 insertions(+), 27 deletions(-) create mode 100644 changelog.d/5138.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/5138.bugfix b/changelog.d/5138.bugfix new file mode 100644 index 0000000000..c1945a8eb2 --- /dev/null +++ b/changelog.d/5138.bugfix @@ -0,0 +1 @@ +Limit the number of EDUs in transactions to 100 as expected by synapse. Thanks to @superboum for this work! diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index be99211003..4d96f026c6 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -33,6 +33,9 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage import UserPresenceState from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter +# This is defined in the Matrix spec and enforced by the receiver. +MAX_EDUS_PER_TRANSACTION = 100 + logger = logging.getLogger(__name__) @@ -197,7 +200,8 @@ class PerDestinationQueue(object): pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( - yield self._get_new_device_messages() + # We have to keep 2 free slots for presence and rr_edus + yield self._get_new_device_messages(MAX_EDUS_PER_TRANSACTION - 2) ) # BEGIN CRITICAL SECTION @@ -216,19 +220,9 @@ class PerDestinationQueue(object): pending_edus = [] - pending_edus.extend(self._get_rr_edus(force_flush=False)) - # We can only include at most 100 EDUs per transactions - pending_edus.extend(self._pop_pending_edus(100 - len(pending_edus))) - - pending_edus.extend( - self._pending_edus_keyed.values() - ) - - self._pending_edus_keyed = {} - - pending_edus.extend(device_message_edus) - + # rr_edus and pending_presence take at most one slot each + pending_edus.extend(self._get_rr_edus(force_flush=False)) pending_presence = self._pending_presence self._pending_presence = {} if pending_presence: @@ -248,6 +242,12 @@ class PerDestinationQueue(object): ) ) + pending_edus.extend(device_message_edus) + pending_edus.extend(self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus))) + while len(pending_edus) < MAX_EDUS_PER_TRANSACTION and self._pending_edus_keyed: + _, val = self._pending_edus_keyed.popitem() + pending_edus.append(val) + if pending_pdus: logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", self._destination, len(pending_pdus)) @@ -259,7 +259,7 @@ class PerDestinationQueue(object): # if we've decided to send a transaction anyway, and we have room, we # may as well send any pending RRs - if len(pending_edus) < 100: + if len(pending_edus) < MAX_EDUS_PER_TRANSACTION: pending_edus.extend(self._get_rr_edus(force_flush=True)) # END CRITICAL SECTION @@ -346,33 +346,37 @@ class PerDestinationQueue(object): return pending_edus @defer.inlineCallbacks - def _get_new_device_messages(self): - last_device_stream_id = self._last_device_stream_id - to_device_stream_id = self._store.get_to_device_stream_token() - contents, stream_id = yield self._store.get_new_device_msgs_for_remote( - self._destination, last_device_stream_id, to_device_stream_id + def _get_new_device_messages(self, limit): + last_device_list = self._last_device_list_stream_id + # Will return at most 20 entries + now_stream_id, results = yield self._store.get_devices_by_remote( + self._destination, last_device_list ) edus = [ Edu( origin=self._server_name, destination=self._destination, - edu_type="m.direct_to_device", + edu_type="m.device_list_update", content=content, ) - for content in contents + for content in results ] - last_device_list = self._last_device_list_stream_id - now_stream_id, results = yield self._store.get_devices_by_remote( - self._destination, last_device_list + assert len(edus) <= limit, "get_devices_by_remote returned too many EDUs" + + last_device_stream_id = self._last_device_stream_id + to_device_stream_id = self._store.get_to_device_stream_token() + contents, stream_id = yield self._store.get_new_device_msgs_for_remote( + self._destination, last_device_stream_id, to_device_stream_id, limit - len(edus) ) edus.extend( Edu( origin=self._server_name, destination=self._destination, - edu_type="m.device_list_update", + edu_type="m.direct_to_device", content=content, ) - for content in results + for content in contents ) + defer.returnValue((edus, stream_id, now_stream_id)) diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index fed4ea3610..9b0a99cb49 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -118,7 +118,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): defer.returnValue(count) def get_new_device_msgs_for_remote( - self, destination, last_stream_id, current_stream_id, limit=100 + self, destination, last_stream_id, current_stream_id, limit ): """ Args: -- cgit 1.5.1 From 4fb44fb5b9f0043ab7bfbc0aa7251c92680cc639 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 May 2019 13:37:44 +0100 Subject: Expose DataStore._get_events as get_events_as_list This is in preparation for reaction work which requires it. --- synapse/storage/appservice.py | 4 +-- synapse/storage/event_federation.py | 6 ++--- synapse/storage/events_worker.py | 50 ++++++++++++++++++++++++++----------- synapse/storage/search.py | 4 +-- synapse/storage/stream.py | 18 +++++++------ tests/storage/test_appservice.py | 2 +- 6 files changed, 54 insertions(+), 30 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 6092f600ba..eb329ebd8b 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -302,7 +302,7 @@ class ApplicationServiceTransactionWorkerStore( event_ids = json.loads(entry["event_ids"]) - events = yield self._get_events(event_ids) + events = yield self.get_events_as_list(event_ids) defer.returnValue( AppServiceTransaction(service=service, id=entry["txn_id"], events=events) @@ -358,7 +358,7 @@ class ApplicationServiceTransactionWorkerStore( "get_new_events_for_appservice", get_new_events_for_appservice_txn ) - events = yield self._get_events(event_ids) + events = yield self.get_events_as_list(event_ids) defer.returnValue((upper_bound, events)) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 956f876572..09e39c2c28 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -45,7 +45,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas """ return self.get_auth_chain_ids( event_ids, include_given=include_given - ).addCallback(self._get_events) + ).addCallback(self.get_events_as_list) def get_auth_chain_ids(self, event_ids, include_given=False): """Get auth events for given event_ids. The events *must* be state events. @@ -316,7 +316,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas event_list, limit, ) - .addCallback(self._get_events) + .addCallback(self.get_events_as_list) .addCallback(lambda l: sorted(l, key=lambda e: -e.depth)) ) @@ -382,7 +382,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas latest_events, limit, ) - events = yield self._get_events(ids) + events = yield self.get_events_as_list(ids) defer.returnValue(events) def _get_missing_events(self, txn, room_id, earliest_events, latest_events, limit): diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 663991a9b6..5c40056d11 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -103,7 +103,7 @@ class EventsWorkerStore(SQLBaseStore): Returns: Deferred : A FrozenEvent. """ - events = yield self._get_events( + events = yield self.get_events_as_list( [event_id], check_redacted=check_redacted, get_prev_content=get_prev_content, @@ -142,7 +142,7 @@ class EventsWorkerStore(SQLBaseStore): Returns: Deferred : Dict from event_id to event. """ - events = yield self._get_events( + events = yield self.get_events_as_list( event_ids, check_redacted=check_redacted, get_prev_content=get_prev_content, @@ -152,13 +152,32 @@ class EventsWorkerStore(SQLBaseStore): defer.returnValue({e.event_id: e for e in events}) @defer.inlineCallbacks - def _get_events( + def get_events_as_list( self, event_ids, check_redacted=True, get_prev_content=False, allow_rejected=False, ): + """Get events from the database and return in a list in the same order + as given by `event_ids` arg. + + Args: + event_ids (list): The event_ids of the events to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + + Returns: + Deferred[list]: List of events fetched from the database. The + events are in the same order as `event_ids` arg. + + Note that the returned list may be smaller than the list of event + IDs if not all events could be fetched. + """ + if not event_ids: defer.returnValue([]) @@ -202,21 +221,22 @@ class EventsWorkerStore(SQLBaseStore): # # The problem is that we end up at this point when an event # which has been redacted is pulled out of the database by - # _enqueue_events, because _enqueue_events needs to check the - # redaction before it can cache the redacted event. So obviously, - # calling get_event to get the redacted event out of the database - # gives us an infinite loop. + # _enqueue_events, because _enqueue_events needs to check + # the redaction before it can cache the redacted event. So + # obviously, calling get_event to get the redacted event out + # of the database gives us an infinite loop. # - # For now (quick hack to fix during 0.99 release cycle), we just - # go and fetch the relevant row from the db, but it would be nice - # to think about how we can cache this rather than hit the db - # every time we access a redaction event. + # For now (quick hack to fix during 0.99 release cycle), we + # just go and fetch the relevant row from the db, but it + # would be nice to think about how we can cache this rather + # than hit the db every time we access a redaction event. # # One thought on how to do this: - # 1. split _get_events up so that it is divided into (a) get the - # rawish event from the db/cache, (b) do the redaction/rejection - # filtering - # 2. have _get_event_from_row just call the first half of that + # 1. split get_events_as_list up so that it is divided into + # (a) get the rawish event from the db/cache, (b) do the + # redaction/rejection filtering + # 2. have _get_event_from_row just call the first half of + # that orig_sender = yield self._simple_select_one_onecol( table="events", diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 226f8f1b7e..ff49eaae02 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -460,7 +460,7 @@ class SearchStore(BackgroundUpdateStore): results = list(filter(lambda row: row["room_id"] in room_ids, results)) - events = yield self._get_events([r["event_id"] for r in results]) + events = yield self.get_events_as_list([r["event_id"] for r in results]) event_map = {ev.event_id: ev for ev in events} @@ -605,7 +605,7 @@ class SearchStore(BackgroundUpdateStore): results = list(filter(lambda row: row["room_id"] in room_ids, results)) - events = yield self._get_events([r["event_id"] for r in results]) + events = yield self.get_events_as_list([r["event_id"] for r in results]) event_map = {ev.event_id: ev for ev in events} diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 9cd1e0f9fe..d105b6b17d 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -319,7 +319,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): rows = yield self.runInteraction("get_room_events_stream_for_room", f) - ret = yield self._get_events([r.event_id for r in rows], get_prev_content=True) + ret = yield self.get_events_as_list([ + r.event_id for r in rows], get_prev_content=True, + ) self._set_before_and_after(ret, rows, topo_order=from_id is None) @@ -367,7 +369,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): rows = yield self.runInteraction("get_membership_changes_for_user", f) - ret = yield self._get_events([r.event_id for r in rows], get_prev_content=True) + ret = yield self.get_events_as_list( + [r.event_id for r in rows], get_prev_content=True, + ) self._set_before_and_after(ret, rows, topo_order=False) @@ -394,7 +398,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) logger.debug("stream before") - events = yield self._get_events( + events = yield self.get_events_as_list( [r.event_id for r in rows], get_prev_content=True ) logger.debug("stream after") @@ -580,11 +584,11 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): event_filter, ) - events_before = yield self._get_events( + events_before = yield self.get_events_as_list( [e for e in results["before"]["event_ids"]], get_prev_content=True ) - events_after = yield self._get_events( + events_after = yield self.get_events_as_list( [e for e in results["after"]["event_ids"]], get_prev_content=True ) @@ -697,7 +701,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): "get_all_new_events_stream", get_all_new_events_stream_txn ) - events = yield self._get_events(event_ids) + events = yield self.get_events_as_list(event_ids) defer.returnValue((upper_bound, events)) @@ -849,7 +853,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): event_filter, ) - events = yield self._get_events( + events = yield self.get_events_as_list( [r.event_id for r in rows], get_prev_content=True ) diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index 3f0083831b..25a6c89ef5 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -340,7 +340,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): other_events = [Mock(event_id="e5"), Mock(event_id="e6")] # we aren't testing store._base stuff here, so mock this out - self.store._get_events = Mock(return_value=events) + self.store.get_events_as_list = Mock(return_value=events) yield self._insert_txn(self.as_list[1]["id"], 9, other_events) yield self._insert_txn(service.id, 10, events) -- cgit 1.5.1 From 52ddc6c0ed40dfc15e8e4abd383a2a5fc12fcfec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 15 May 2019 09:52:15 +0100 Subject: Update docstring with correct type Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/events_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 5c40056d11..adc6cf26b5 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -171,7 +171,7 @@ class EventsWorkerStore(SQLBaseStore): allow_rejected (bool): If True return rejected events. Returns: - Deferred[list]: List of events fetched from the database. The + Deferred[list[EventBase]]: List of events fetched from the database. The events are in the same order as `event_ids` arg. Note that the returned list may be smaller than the list of event -- cgit 1.5.1 From 54d77107c1fde88333ecccedfe30f26fb7645847 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 15 May 2019 11:27:50 +0100 Subject: Make generating SQL bounds for pagination generic This will allow us to reuse the same structure when we paginate e.g. relations --- synapse/storage/stream.py | 179 ++++++++++++++++++++++++++++++---------------- 1 file changed, 118 insertions(+), 61 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index d105b6b17d..163363c0c2 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -64,59 +64,120 @@ _EventDictReturn = namedtuple( ) -def lower_bound(token, engine, inclusive=False): - inclusive = "=" if inclusive else "" - if token.topological is None: - return "(%d <%s %s)" % (token.stream, inclusive, "stream_ordering") - else: - if isinstance(engine, PostgresEngine): - # Postgres doesn't optimise ``(x < a) OR (x=a AND y= (topological_ordering, stream_ordering) + AND (5, 3) < (topological_ordering, stream_ordering) + + would be generated for dir=b, from_token=(6, 7) and to_token=(5, 3). + + Args: + direction (str): Whether we're paginating backwards("b") or + forwards ("f"). + column_names (tuple[str, str]): The column names to bound. Must *not* + be user defined as these get inserted directly into the SQL + statement without escapes. + from_token (tuple[int, int]|None) + to_token (tuple[int, int]|None) + engine: The database engine to generate the clauses for + + Returns: + str: The sql expression + """ + assert direction in ("b", "f") + + where_clause = [] + if from_token: + where_clause.append( + _make_generic_sql_bound( + bound=">=" if direction == "b" else "<", + column_names=column_names, + values=from_token, + engine=engine, ) - return "(%d < %s OR (%d = %s AND %d <%s %s))" % ( - token.topological, - "topological_ordering", - token.topological, - "topological_ordering", - token.stream, - inclusive, - "stream_ordering", - ) - - -def upper_bound(token, engine, inclusive=True): - inclusive = "=" if inclusive else "" - if token.topological is None: - return "(%d >%s %s)" % (token.stream, inclusive, "stream_ordering") - else: - if isinstance(engine, PostgresEngine): - # Postgres doesn't optimise ``(x > a) OR (x=a AND y>b)`` as well - # as it optimises ``(x,y) > (a,b)`` on multicolumn indexes. So we - # use the later form when running against postgres. - return "((%d,%d) >%s (%s,%s))" % ( - token.topological, - token.stream, - inclusive, - "topological_ordering", - "stream_ordering", + ) + + if to_token: + where_clause.append( + _make_generic_sql_bound( + bound="<" if direction == "b" else ">=", + column_names=column_names, + values=to_token, + engine=engine, ) - return "(%d > %s OR (%d = %s AND %d >%s %s))" % ( - token.topological, - "topological_ordering", - token.topological, - "topological_ordering", - token.stream, - inclusive, - "stream_ordering", ) + return " AND ".join(where_clause) + + +def _make_generic_sql_bound(bound, column_names, values, engine): + """Create an SQL expression that bounds the given column names by the + values, e.g. create the equivalent of `(1, 2) < (col1, col2)`. + + Only works with two columns. + + Older versions of SQLite don't support that syntax so we have to expand it + out manually. + + Args: + bound (str): The comparison operator to use. One of ">", "<", ">=", + "<=", where the values are on the left and columns on the right. + names (tuple[str, str]): The column names. Must *not* be user defined + as these get inserted directly into the SQL statement without + escapes. + values (tuple[int, int]): The values to bound the columns by. + engine: The database engine to generate the SQL for + + Returns: + str + """ + + assert(bound in (">", "<", ">=", "<=")) + + name1, name2 = column_names + val1, val2 = values + + if val1 is None: + val2 = int(val2) + return "(%d %s %s)" % (val2, bound, name2) + + val1 = int(val1) + val2 = int(val2) + + if isinstance(engine, PostgresEngine): + # Postgres doesn't optimise ``(x < a) OR (x=a AND y Date: Tue, 14 May 2019 16:59:21 +0100 Subject: Add simple send_relation API and track in DB --- synapse/api/constants.py | 8 ++ synapse/rest/__init__.py | 2 + synapse/rest/client/v2_alpha/relations.py | 110 ++++++++++++++++++++++++++ synapse/storage/__init__.py | 2 + synapse/storage/events.py | 2 + synapse/storage/relations.py | 62 +++++++++++++++ synapse/storage/schema/delta/54/relations.sql | 27 +++++++ tests/rest/client/v2_alpha/test_relations.py | 98 +++++++++++++++++++++++ 8 files changed, 311 insertions(+) create mode 100644 synapse/rest/client/v2_alpha/relations.py create mode 100644 synapse/storage/relations.py create mode 100644 synapse/storage/schema/delta/54/relations.sql create mode 100644 tests/rest/client/v2_alpha/test_relations.py (limited to 'synapse/storage') diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 8547a63535..30bebd749f 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -116,3 +116,11 @@ class UserTypes(object): """ SUPPORT = "support" ALL_USER_TYPES = (SUPPORT,) + + +class RelationTypes(object): + """The types of relations known to this server. + """ + ANNOTATION = "m.annotation" + REPLACES = "m.replaces" + REFERENCES = "m.references" diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 3a24d31d1b..e6110ad9b1 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -44,6 +44,7 @@ from synapse.rest.client.v2_alpha import ( read_marker, receipts, register, + relations, report_event, room_keys, room_upgrade_rest_servlet, @@ -115,6 +116,7 @@ class ClientRestResource(JsonResource): room_upgrade_rest_servlet.register_servlets(hs, client_resource) capabilities.register_servlets(hs, client_resource) account_validity.register_servlets(hs, client_resource) + relations.register_servlets(hs, client_resource) # moving to /_synapse/admin synapse.rest.admin.register_servlets_for_client_rest_resource( diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py new file mode 100644 index 0000000000..b504b4a8be --- /dev/null +++ b/synapse/rest/client/v2_alpha/relations.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This class implements the proposed relation APIs from MSC 1849. + +Since the MSC has not been approved all APIs here are unstable and may change at +any time to reflect changes in the MSC. +""" + +import logging + +from twisted.internet import defer + +from synapse.api.constants import EventTypes +from synapse.api.errors import SynapseError +from synapse.http.servlet import ( + RestServlet, + parse_json_object_from_request, + parse_string, +) + +from ._base import client_v2_patterns + +logger = logging.getLogger(__name__) + + +class RelationSendServlet(RestServlet): + """Helper API for sending events that have relation data. + + Example API shape to send a 👍 reaction to a room: + + POST /rooms/!foo/send_relation/$bar/m.annotation/m.reaction?key=%F0%9F%91%8D + {} + + { + "event_id": "$foobar" + } + """ + + PATTERN = ( + "/rooms/(?P[^/]*)/send_relation" + "/(?P[^/]*)/(?P[^/]*)/(?P[^/]*)" + ) + + def __init__(self, hs): + super(RelationSendServlet, self).__init__() + self.auth = hs.get_auth() + self.event_creation_handler = hs.get_event_creation_handler() + + def register(self, http_server): + http_server.register_paths( + "POST", + client_v2_patterns(self.PATTERN + "$", releases=()), + self.on_PUT_or_POST, + ) + http_server.register_paths( + "PUT", + client_v2_patterns(self.PATTERN + "/(?P[^/]*)$", releases=()), + self.on_PUT_or_POST, + ) + + @defer.inlineCallbacks + def on_PUT_or_POST( + self, request, room_id, parent_id, relation_type, event_type, txn_id=None + ): + requester = yield self.auth.get_user_by_req(request, allow_guest=True) + + if event_type == EventTypes.Member: + # Add relations to a membership is meaningless, so we just deny it + # at the CS API rather than trying to handle it correctly. + raise SynapseError(400, "Cannot send member events with relations") + + content = parse_json_object_from_request(request) + + aggregation_key = parse_string(request, "key", encoding="utf-8") + + content["m.relates_to"] = { + "event_id": parent_id, + "key": aggregation_key, + "rel_type": relation_type, + } + + event_dict = { + "type": event_type, + "content": content, + "room_id": room_id, + "sender": requester.user.to_string(), + } + + event = yield self.event_creation_handler.create_and_send_nonmember_event( + requester, event_dict=event_dict, txn_id=txn_id + ) + + defer.returnValue((200, {"event_id": event.event_id})) + + +def register_servlets(hs, http_server): + RelationSendServlet(hs).register(http_server) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index c432041b4e..7522d3fd57 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -49,6 +49,7 @@ from .pusher import PusherStore from .receipts import ReceiptsStore from .registration import RegistrationStore from .rejections import RejectionsStore +from .relations import RelationsStore from .room import RoomStore from .roommember import RoomMemberStore from .search import SearchStore @@ -99,6 +100,7 @@ class DataStore( GroupServerStore, UserErasureStore, MonthlyActiveUsersStore, + RelationsStore, ): def __init__(self, db_conn, hs): self.hs = hs diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a7f841c6c..6802bf42ce 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1351,6 +1351,8 @@ class EventsStore( # Insert into the event_search table. self._store_guest_access_txn(txn, event) + self._handle_event_relations(txn, event) + # Insert into the room_memberships table. self._store_room_members_txn( txn, diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py new file mode 100644 index 0000000000..a4905162e0 --- /dev/null +++ b/synapse/storage/relations.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from synapse.api.constants import RelationTypes +from synapse.storage._base import SQLBaseStore + +logger = logging.getLogger(__name__) + + +class RelationsStore(SQLBaseStore): + def _handle_event_relations(self, txn, event): + """Handles inserting relation data during peristence of events + + Args: + txn + event (EventBase) + """ + relation = event.content.get("m.relates_to") + if not relation: + # No relations + return + + rel_type = relation.get("rel_type") + if rel_type not in ( + RelationTypes.ANNOTATION, + RelationTypes.REFERENCES, + RelationTypes.REPLACES, + ): + # Unknown relation type + return + + parent_id = relation.get("event_id") + if not parent_id: + # Invalid relation + return + + aggregation_key = relation.get("key") + + self._simple_insert_txn( + txn, + table="event_relations", + values={ + "event_id": event.event_id, + "relates_to_id": parent_id, + "relation_type": rel_type, + "aggregation_key": aggregation_key, + }, + ) diff --git a/synapse/storage/schema/delta/54/relations.sql b/synapse/storage/schema/delta/54/relations.sql new file mode 100644 index 0000000000..134862b870 --- /dev/null +++ b/synapse/storage/schema/delta/54/relations.sql @@ -0,0 +1,27 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Tracks related events, like reactions, replies, edits, etc. Note that things +-- in this table are not necessarily "valid", e.g. it may contain edits from +-- people who don't have power to edit other peoples events. +CREATE TABLE IF NOT EXISTS event_relations ( + event_id TEXT NOT NULL, + relates_to_id TEXT NOT NULL, + relation_type TEXT NOT NULL, + aggregation_key TEXT +); + +CREATE UNIQUE INDEX event_relations_id ON event_relations(event_id); +CREATE INDEX event_relations_relates ON event_relations(relates_to_id, relation_type, aggregation_key); diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py new file mode 100644 index 0000000000..61163d5b26 --- /dev/null +++ b/tests/rest/client/v2_alpha/test_relations.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six + +from synapse.api.constants import EventTypes, RelationTypes +from synapse.rest.client.v1 import login, room +from synapse.rest.client.v2_alpha import relations + +from tests import unittest + + +class RelationsTestCase(unittest.HomeserverTestCase): + user_id = "@alice:test" + servlets = [ + relations.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.room = self.helper.create_room_as(self.user_id) + res = self.helper.send(self.room, body="Hi!") + self.parent_id = res["event_id"] + + def test_send_relation(self): + """Tests that sending a relation using the new /send_relation works + creates the right shape of event. + """ + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="👍") + self.assertEquals(200, channel.code, channel.json_body) + + event_id = channel.json_body["event_id"] + + request, channel = self.make_request( + "GET", "/rooms/%s/event/%s" % (self.room, event_id) + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.assert_dict( + { + "type": "m.reaction", + "sender": self.user_id, + "content": { + "m.relates_to": { + "event_id": self.parent_id, + "key": u"👍", + "rel_type": RelationTypes.ANNOTATION, + } + }, + }, + channel.json_body, + ) + + def test_deny_membership(self): + """Test that we deny relations on membership events + """ + channel = self._send_relation(RelationTypes.ANNOTATION, EventTypes.Member) + self.assertEquals(400, channel.code, channel.json_body) + + def _send_relation(self, relation_type, event_type, key=None): + """Helper function to send a relation pointing at `self.parent_id` + + Args: + relation_type (str): One of `RelationTypes` + event_type (str): The type of the event to create + key (str|None): The aggregation key used for m.annotation relation + type. + + Returns: + FakeChannel + """ + query = "" + if key: + query = "?key=" + six.moves.urllib.parse.quote_plus(key) + + request, channel = self.make_request( + "POST", + "/_matrix/client/unstable/rooms/%s/send_relation/%s/%s/%s%s" + % (self.room, self.parent_id, relation_type, event_type, query), + b"{}", + ) + self.render(request) + return channel -- cgit 1.5.1 From b50641e357f6139b0807df3714440a8ccc21d628 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 May 2019 16:59:21 +0100 Subject: Add simple pagination API --- synapse/rest/client/v2_alpha/relations.py | 50 +++++++++++++++++ synapse/storage/relations.py | 80 ++++++++++++++++++++++++++++ tests/rest/client/v2_alpha/test_relations.py | 30 +++++++++++ 3 files changed, 160 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py index b504b4a8be..bac9e85c21 100644 --- a/synapse/rest/client/v2_alpha/relations.py +++ b/synapse/rest/client/v2_alpha/relations.py @@ -27,6 +27,7 @@ from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.http.servlet import ( RestServlet, + parse_integer, parse_json_object_from_request, parse_string, ) @@ -106,5 +107,54 @@ class RelationSendServlet(RestServlet): defer.returnValue((200, {"event_id": event.event_id})) +class RelationPaginationServlet(RestServlet): + """API to paginate relations on an event by topological ordering, optionally + filtered by relation type and event type. + """ + + PATTERNS = client_v2_patterns( + "/rooms/(?P[^/]*)/relations/(?P[^/]*)" + "(/(?P[^/]*)(/(?P[^/]*))?)?$", + releases=(), + ) + + def __init__(self, hs): + super(RelationPaginationServlet, self).__init__() + self.auth = hs.get_auth() + self.store = hs.get_datastore() + self.clock = hs.get_clock() + self._event_serializer = hs.get_event_client_serializer() + + @defer.inlineCallbacks + def on_GET(self, request, room_id, parent_id, relation_type=None, event_type=None): + requester = yield self.auth.get_user_by_req(request, allow_guest=True) + + yield self.auth.check_in_room_or_world_readable( + room_id, requester.user.to_string() + ) + + limit = parse_integer(request, "limit", default=5) + + result = yield self.store.get_relations_for_event( + event_id=parent_id, + relation_type=relation_type, + event_type=event_type, + limit=limit, + ) + + events = yield self.store.get_events_as_list( + [c["event_id"] for c in result.chunk] + ) + + now = self.clock.time_msec() + events = yield self._event_serializer.serialize_events(events, now) + + return_value = result.to_dict() + return_value["chunk"] = events + + defer.returnValue((200, return_value)) + + def register_servlets(hs, http_server): RelationSendServlet(hs).register(http_server) + RelationPaginationServlet(hs).register(http_server) diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index a4905162e0..f304b8a9a4 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -15,13 +15,93 @@ import logging +import attr + from synapse.api.constants import RelationTypes from synapse.storage._base import SQLBaseStore logger = logging.getLogger(__name__) +@attr.s +class PaginationChunk(object): + """Returned by relation pagination APIs. + + Attributes: + chunk (list): The rows returned by pagination + """ + + chunk = attr.ib() + + def to_dict(self): + d = {"chunk": self.chunk} + + return d + + class RelationsStore(SQLBaseStore): + def get_relations_for_event( + self, event_id, relation_type=None, event_type=None, limit=5, direction="b" + ): + """Get a list of relations for an event, ordered by topological ordering. + + Args: + event_id (str): Fetch events that relate to this event ID. + relation_type (str|None): Only fetch events with this relation + type, if given. + event_type (str|None): Only fetch events with this event type, if + given. + limit (int): Only fetch the most recent `limit` events. + direction (str): Whether to fetch the most recent first (`"b"`) or + the oldest first (`"f"`). + + Returns: + Deferred[PaginationChunk]: List of event IDs that match relations + requested. The rows are of the form `{"event_id": "..."}`. + """ + + # TODO: Pagination tokens + + where_clause = ["relates_to_id = ?"] + where_args = [event_id] + + if relation_type: + where_clause.append("relation_type = ?") + where_args.append(relation_type) + + if event_type: + where_clause.append("type = ?") + where_args.append(event_type) + + order = "ASC" + if direction == "b": + order = "DESC" + + sql = """ + SELECT event_id FROM event_relations + INNER JOIN events USING (event_id) + WHERE %s + ORDER BY topological_ordering %s, stream_ordering %s + LIMIT ? + """ % ( + " AND ".join(where_clause), + order, + order, + ) + + def _get_recent_references_for_event_txn(txn): + txn.execute(sql, where_args + [limit + 1]) + + events = [{"event_id": row[0]} for row in txn] + + return PaginationChunk( + chunk=list(events[:limit]), + ) + + return self.runInteraction( + "get_recent_references_for_event", _get_recent_references_for_event_txn + ) + def _handle_event_relations(self, txn, event): """Handles inserting relation data during peristence of events diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py index 61163d5b26..bcc1c1bb85 100644 --- a/tests/rest/client/v2_alpha/test_relations.py +++ b/tests/rest/client/v2_alpha/test_relations.py @@ -72,6 +72,36 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel = self._send_relation(RelationTypes.ANNOTATION, EventTypes.Member) self.assertEquals(400, channel.code, channel.json_body) + def test_paginate(self): + """Tests that calling pagination API corectly the latest relations. + """ + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction") + self.assertEquals(200, channel.code, channel.json_body) + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction") + self.assertEquals(200, channel.code, channel.json_body) + annotation_id = channel.json_body["event_id"] + + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/relations/%s?limit=1" + % (self.room, self.parent_id), + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + # We expect to get back a single pagination result, which is the full + # relation event we sent above. + self.assertEquals(len(channel.json_body["chunk"]), 1, channel.json_body) + self.assert_dict( + { + "event_id": annotation_id, + "sender": self.user_id, + "type": "m.reaction", + }, + channel.json_body["chunk"][0], + ) + def _send_relation(self, relation_type, event_type, key=None): """Helper function to send a relation pointing at `self.parent_id` -- cgit 1.5.1 From 5be34fc3e3045b3ba5a97b73ba0f25887874e622 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 15 May 2019 17:30:23 +0100 Subject: Actually check for None rather falsey --- synapse/storage/relations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index f304b8a9a4..31ef6679af 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -65,11 +65,11 @@ class RelationsStore(SQLBaseStore): where_clause = ["relates_to_id = ?"] where_args = [event_id] - if relation_type: + if relation_type is not None: where_clause.append("relation_type = ?") where_args.append(relation_type) - if event_type: + if event_type is not None: where_clause.append("type = ?") where_args.append(event_type) -- cgit 1.5.1 From a0603523d2e210cf59f887bd75e1a755720cb7a8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 May 2019 16:59:21 +0100 Subject: Add aggregations API --- synapse/config/server.py | 5 + synapse/events/utils.py | 34 +++- synapse/rest/client/v2_alpha/relations.py | 141 ++++++++++++++- synapse/storage/relations.py | 225 +++++++++++++++++++++++- tests/rest/client/v2_alpha/test_relations.py | 251 ++++++++++++++++++++++++++- 5 files changed, 643 insertions(+), 13 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/config/server.py b/synapse/config/server.py index 7874cd9da7..f403477b54 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -100,6 +100,11 @@ class ServerConfig(Config): "block_non_admin_invites", False, ) + # Whether to enable experimental MSC1849 (aka relations) support + self.experimental_msc1849_support_enabled = config.get( + "experimental_msc1849_support_enabled", False, + ) + # Options to control access by tracking MAU self.limit_usage_by_mau = config.get("limit_usage_by_mau", False) self.max_mau_value = 0 diff --git a/synapse/events/utils.py b/synapse/events/utils.py index a5454556cc..16d0c64372 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -21,7 +21,7 @@ from frozendict import frozendict from twisted.internet import defer -from synapse.api.constants import EventTypes +from synapse.api.constants import EventTypes, RelationTypes from synapse.util.async_helpers import yieldable_gather_results from . import EventBase @@ -324,8 +324,12 @@ class EventClientSerializer(object): """ def __init__(self, hs): - pass + self.store = hs.get_datastore() + self.experimental_msc1849_support_enabled = ( + hs.config.experimental_msc1849_support_enabled + ) + @defer.inlineCallbacks def serialize_event(self, event, time_now, **kwargs): """Serializes a single event. @@ -337,8 +341,32 @@ class EventClientSerializer(object): Returns: Deferred[dict]: The serialized event """ + # To handle the case of presence events and the like + if not isinstance(event, EventBase): + defer.returnValue(event) + + event_id = event.event_id event = serialize_event(event, time_now, **kwargs) - return defer.succeed(event) + + # If MSC1849 is enabled then we need to look if thre are any relations + # we need to bundle in with the event + if self.experimental_msc1849_support_enabled: + annotations = yield self.store.get_aggregation_groups_for_event( + event_id, + ) + references = yield self.store.get_relations_for_event( + event_id, RelationTypes.REFERENCES, direction="f", + ) + + if annotations.chunk: + r = event["unsigned"].setdefault("m.relations", {}) + r[RelationTypes.ANNOTATION] = annotations.to_dict() + + if references.chunk: + r = event["unsigned"].setdefault("m.relations", {}) + r[RelationTypes.REFERENCES] = references.to_dict() + + defer.returnValue(event) def serialize_events(self, events, time_now, **kwargs): """Serializes multiple events. diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py index c3ac73b8c7..f468409b68 100644 --- a/synapse/rest/client/v2_alpha/relations.py +++ b/synapse/rest/client/v2_alpha/relations.py @@ -23,7 +23,7 @@ import logging from twisted.internet import defer -from synapse.api.constants import EventTypes +from synapse.api.constants import EventTypes, RelationTypes from synapse.api.errors import SynapseError from synapse.http.servlet import ( RestServlet, @@ -141,12 +141,16 @@ class RelationPaginationServlet(RestServlet): ) limit = parse_integer(request, "limit", default=5) + from_token = parse_string(request, "from") + to_token = parse_string(request, "to") result = yield self.store.get_relations_for_event( event_id=parent_id, relation_type=relation_type, event_type=event_type, limit=limit, + from_token=from_token, + to_token=to_token, ) events = yield self.store.get_events_as_list( @@ -162,6 +166,141 @@ class RelationPaginationServlet(RestServlet): defer.returnValue((200, return_value)) +class RelationAggregationPaginationServlet(RestServlet): + """API to paginate aggregation groups of relations, e.g. paginate the + types and counts of the reactions on the events. + + Example request and response: + + GET /rooms/{room_id}/aggregations/{parent_id} + + { + chunk: [ + { + "type": "m.reaction", + "key": "👍", + "count": 3 + } + ] + } + """ + + PATTERNS = client_v2_patterns( + "/rooms/(?P[^/]*)/aggregations/(?P[^/]*)" + "(/(?P[^/]*)(/(?P[^/]*))?)?$", + releases=(), + ) + + def __init__(self, hs): + super(RelationAggregationPaginationServlet, self).__init__() + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, room_id, parent_id, relation_type=None, event_type=None): + requester = yield self.auth.get_user_by_req(request, allow_guest=True) + + yield self.auth.check_in_room_or_world_readable( + room_id, requester.user.to_string() + ) + + if relation_type not in (RelationTypes.ANNOTATION, None): + raise SynapseError(400, "Relation type must be 'annotation'") + + limit = parse_integer(request, "limit", default=5) + from_token = parse_string(request, "from") + to_token = parse_string(request, "to") + + res = yield self.store.get_aggregation_groups_for_event( + event_id=parent_id, + event_type=event_type, + limit=limit, + from_token=from_token, + to_token=to_token, + ) + + defer.returnValue((200, res.to_dict())) + + +class RelationAggregationGroupPaginationServlet(RestServlet): + """API to paginate within an aggregation group of relations, e.g. paginate + all the 👍 reactions on an event. + + Example request and response: + + GET /rooms/{room_id}/aggregations/{parent_id}/m.annotation/m.reaction/👍 + + { + chunk: [ + { + "type": "m.reaction", + "content": { + "m.relates_to": { + "rel_type": "m.annotation", + "key": "👍" + } + } + }, + ... + ] + } + """ + + PATTERNS = client_v2_patterns( + "/rooms/(?P[^/]*)/aggregations/(?P[^/]*)" + "/(?P[^/]*)/(?P[^/]*)/(?P[^/]*)$", + releases=(), + ) + + def __init__(self, hs): + super(RelationAggregationGroupPaginationServlet, self).__init__() + self.auth = hs.get_auth() + self.store = hs.get_datastore() + self.clock = hs.get_clock() + self._event_serializer = hs.get_event_client_serializer() + + @defer.inlineCallbacks + def on_GET(self, request, room_id, parent_id, relation_type, event_type, key): + requester = yield self.auth.get_user_by_req(request, allow_guest=True) + + yield self.auth.check_in_room_or_world_readable( + room_id, requester.user.to_string() + ) + + if relation_type != RelationTypes.ANNOTATION: + raise SynapseError(400, "Relation type must be 'annotation'") + + limit = parse_integer(request, "limit", default=5) + from_token = parse_string(request, "from") + to_token = parse_string(request, "to") + + result = yield self.store.get_relations_for_event( + event_id=parent_id, + relation_type=relation_type, + event_type=event_type, + aggregation_key=key, + limit=limit, + from_token=from_token, + to_token=to_token, + ) + + events = yield self.store.get_events_as_list( + [c["event_id"] for c in result.chunk] + ) + + now = self.clock.time_msec() + events = yield self._event_serializer.serialize_events(events, now) + + return_value = result.to_dict() + return_value["chunk"] = events + + defer.returnValue((200, return_value)) + + defer.returnValue((200, return_value)) + + def register_servlets(hs, http_server): RelationSendServlet(hs).register(http_server) RelationPaginationServlet(hs).register(http_server) + RelationAggregationPaginationServlet(hs).register(http_server) + RelationAggregationGroupPaginationServlet(hs).register(http_server) diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 31ef6679af..db4b842c97 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -18,7 +18,9 @@ import logging import attr from synapse.api.constants import RelationTypes +from synapse.api.errors import SynapseError from synapse.storage._base import SQLBaseStore +from synapse.storage.stream import generate_pagination_where_clause logger = logging.getLogger(__name__) @@ -29,19 +31,94 @@ class PaginationChunk(object): Attributes: chunk (list): The rows returned by pagination + next_batch (Any|None): Token to fetch next set of results with, if + None then there are no more results. + prev_batch (Any|None): Token to fetch previous set of results with, if + None then there are no previous results. """ chunk = attr.ib() + next_batch = attr.ib(default=None) + prev_batch = attr.ib(default=None) def to_dict(self): d = {"chunk": self.chunk} + if self.next_batch: + d["next_batch"] = self.next_batch.to_string() + + if self.prev_batch: + d["prev_batch"] = self.prev_batch.to_string() + return d +@attr.s +class RelationPaginationToken(object): + """Pagination token for relation pagination API. + + As the results are order by topological ordering, we can use the + `topological_ordering` and `stream_ordering` fields of the events at the + boundaries of the chunk as pagination tokens. + + Attributes: + topological (int): The topological ordering of the boundary event + stream (int): The stream ordering of the boundary event. + """ + + topological = attr.ib() + stream = attr.ib() + + @staticmethod + def from_string(string): + try: + t, s = string.split("-") + return RelationPaginationToken(int(t), int(s)) + except ValueError: + raise SynapseError(400, "Invalid token") + + def to_string(self): + return "%d-%d" % (self.topological, self.stream) + + +@attr.s +class AggregationPaginationToken(object): + """Pagination token for relation aggregation pagination API. + + As the results are order by count and then MAX(stream_ordering) of the + aggregation groups, we can just use them as our pagination token. + + Attributes: + count (int): The count of relations in the boundar group. + stream (int): The MAX stream ordering in the boundary group. + """ + + count = attr.ib() + stream = attr.ib() + + @staticmethod + def from_string(string): + try: + c, s = string.split("-") + return AggregationPaginationToken(int(c), int(s)) + except ValueError: + raise SynapseError(400, "Invalid token") + + def to_string(self): + return "%d-%d" % (self.count, self.stream) + + class RelationsStore(SQLBaseStore): def get_relations_for_event( - self, event_id, relation_type=None, event_type=None, limit=5, direction="b" + self, + event_id, + relation_type=None, + event_type=None, + aggregation_key=None, + limit=5, + direction="b", + from_token=None, + to_token=None, ): """Get a list of relations for an event, ordered by topological ordering. @@ -51,16 +128,26 @@ class RelationsStore(SQLBaseStore): type, if given. event_type (str|None): Only fetch events with this event type, if given. + aggregation_key (str|None): Only fetch events with this aggregation + key, if given. limit (int): Only fetch the most recent `limit` events. direction (str): Whether to fetch the most recent first (`"b"`) or the oldest first (`"f"`). + from_token (RelationPaginationToken|None): Fetch rows from the given + token, or from the start if None. + to_token (RelationPaginationToken|None): Fetch rows up to the given + token, or up to the end if None. Returns: Deferred[PaginationChunk]: List of event IDs that match relations requested. The rows are of the form `{"event_id": "..."}`. """ - # TODO: Pagination tokens + if from_token: + from_token = RelationPaginationToken.from_string(from_token) + + if to_token: + to_token = RelationPaginationToken.from_string(to_token) where_clause = ["relates_to_id = ?"] where_args = [event_id] @@ -73,12 +160,29 @@ class RelationsStore(SQLBaseStore): where_clause.append("type = ?") where_args.append(event_type) - order = "ASC" + if aggregation_key: + where_clause.append("aggregation_key = ?") + where_args.append(aggregation_key) + + pagination_clause = generate_pagination_where_clause( + direction=direction, + column_names=("topological_ordering", "stream_ordering"), + from_token=attr.astuple(from_token) if from_token else None, + to_token=attr.astuple(to_token) if to_token else None, + engine=self.database_engine, + ) + + if pagination_clause: + where_clause.append(pagination_clause) + if direction == "b": order = "DESC" + else: + order = "ASC" sql = """ - SELECT event_id FROM event_relations + SELECT event_id, topological_ordering, stream_ordering + FROM event_relations INNER JOIN events USING (event_id) WHERE %s ORDER BY topological_ordering %s, stream_ordering %s @@ -92,16 +196,125 @@ class RelationsStore(SQLBaseStore): def _get_recent_references_for_event_txn(txn): txn.execute(sql, where_args + [limit + 1]) - events = [{"event_id": row[0]} for row in txn] + last_topo_id = None + last_stream_id = None + events = [] + for row in txn: + events.append({"event_id": row[0]}) + last_topo_id = row[1] + last_stream_id = row[2] + + next_batch = None + if len(events) > limit and last_topo_id and last_stream_id: + next_batch = RelationPaginationToken(last_topo_id, last_stream_id) return PaginationChunk( - chunk=list(events[:limit]), + chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token ) return self.runInteraction( "get_recent_references_for_event", _get_recent_references_for_event_txn ) + def get_aggregation_groups_for_event( + self, + event_id, + event_type=None, + limit=5, + direction="b", + from_token=None, + to_token=None, + ): + """Get a list of annotations on the event, grouped by event type and + aggregation key, sorted by count. + + This is used e.g. to get the what and how many reactions have happend + on an event. + + Args: + event_id (str): Fetch events that relate to this event ID. + event_type (str|None): Only fetch events with this event type, if + given. + limit (int): Only fetch the `limit` groups. + direction (str): Whether to fetch the highest count first (`"b"`) or + the lowest count first (`"f"`). + from_token (AggregationPaginationToken|None): Fetch rows from the + given token, or from the start if None. + to_token (AggregationPaginationToken|None): Fetch rows up to the + given token, or up to the end if None. + + + Returns: + Deferred[PaginationChunk]: List of groups of annotations that + match. Each row is a dict with `type`, `key` and `count` fields. + """ + + if from_token: + from_token = AggregationPaginationToken.from_string(from_token) + + if to_token: + to_token = AggregationPaginationToken.from_string(to_token) + + where_clause = ["relates_to_id = ?", "relation_type = ?"] + where_args = [event_id, RelationTypes.ANNOTATION] + + if event_type: + where_clause.append("type = ?") + where_args.append(event_type) + + having_clause = generate_pagination_where_clause( + direction=direction, + column_names=("COUNT(*)", "MAX(stream_ordering)"), + from_token=attr.astuple(from_token) if from_token else None, + to_token=attr.astuple(to_token) if to_token else None, + engine=self.database_engine, + ) + + if direction == "b": + order = "DESC" + else: + order = "ASC" + + if having_clause: + having_clause = "HAVING " + having_clause + else: + having_clause = "" + + sql = """ + SELECT type, aggregation_key, COUNT(*), MAX(stream_ordering) + FROM event_relations + INNER JOIN events USING (event_id) + WHERE {where_clause} + GROUP BY relation_type, type, aggregation_key + {having_clause} + ORDER BY COUNT(*) {order}, MAX(stream_ordering) {order} + LIMIT ? + """.format( + where_clause=" AND ".join(where_clause), + order=order, + having_clause=having_clause, + ) + + def _get_aggregation_groups_for_event_txn(txn): + txn.execute(sql, where_args + [limit + 1]) + + next_batch = None + events = [] + for row in txn: + events.append({"type": row[0], "key": row[1], "count": row[2]}) + next_batch = AggregationPaginationToken(row[2], row[3]) + + if len(events) <= limit: + next_batch = None + + return PaginationChunk( + chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token + ) + + return self.runInteraction( + "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn + ) + def _handle_event_relations(self, txn, event): """Handles inserting relation data during peristence of events diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py index bcc1c1bb85..661dd45755 100644 --- a/tests/rest/client/v2_alpha/test_relations.py +++ b/tests/rest/client/v2_alpha/test_relations.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import itertools + import six from synapse.api.constants import EventTypes, RelationTypes @@ -30,6 +32,12 @@ class RelationsTestCase(unittest.HomeserverTestCase): login.register_servlets, ] + def make_homeserver(self, reactor, clock): + # We need to enable msc1849 support for aggregations + config = self.default_config() + config["experimental_msc1849_support_enabled"] = True + return self.setup_test_homeserver(config=config) + def prepare(self, reactor, clock, hs): self.room = self.helper.create_room_as(self.user_id) res = self.helper.send(self.room, body="Hi!") @@ -40,7 +48,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): creates the right shape of event. """ - channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="👍") + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key=u"👍") self.assertEquals(200, channel.code, channel.json_body) event_id = channel.json_body["event_id"] @@ -72,7 +80,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel = self._send_relation(RelationTypes.ANNOTATION, EventTypes.Member) self.assertEquals(400, channel.code, channel.json_body) - def test_paginate(self): + def test_basic_paginate_relations(self): """Tests that calling pagination API corectly the latest relations. """ channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction") @@ -102,6 +110,243 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel.json_body["chunk"][0], ) + # Make sure next_batch has something in it that looks like it could be a + # valid token. + self.assertIsInstance( + channel.json_body.get("next_batch"), six.string_types, channel.json_body + ) + + def test_repeated_paginate_relations(self): + """Test that if we paginate using a limit and tokens then we get the + expected events. + """ + + expected_event_ids = [] + for _ in range(10): + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction") + self.assertEquals(200, channel.code, channel.json_body) + expected_event_ids.append(channel.json_body["event_id"]) + + prev_token = None + found_event_ids = [] + for _ in range(20): + from_token = "" + if prev_token: + from_token = "&from=" + prev_token + + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/relations/%s?limit=1%s" + % (self.room, self.parent_id, from_token), + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + found_event_ids.extend(e["event_id"] for e in channel.json_body["chunk"]) + next_batch = channel.json_body.get("next_batch") + + self.assertNotEquals(prev_token, next_batch) + prev_token = next_batch + + if not prev_token: + break + + # We paginated backwards, so reverse + found_event_ids.reverse() + self.assertEquals(found_event_ids, expected_event_ids) + + def test_aggregation_pagination_groups(self): + """Test that we can paginate annotation groups correctly. + """ + + sent_groups = {u"👍": 10, u"a": 7, u"b": 5, u"c": 3, u"d": 2, u"e": 1} + for key in itertools.chain.from_iterable( + itertools.repeat(key, num) for key, num in sent_groups.items() + ): + channel = self._send_relation( + RelationTypes.ANNOTATION, "m.reaction", key=key + ) + self.assertEquals(200, channel.code, channel.json_body) + + prev_token = None + found_groups = {} + for _ in range(20): + from_token = "" + if prev_token: + from_token = "&from=" + prev_token + + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/aggregations/%s?limit=1%s" + % (self.room, self.parent_id, from_token), + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.assertEqual(len(channel.json_body["chunk"]), 1, channel.json_body) + + for groups in channel.json_body["chunk"]: + # We only expect reactions + self.assertEqual(groups["type"], "m.reaction", channel.json_body) + + # We should only see each key once + self.assertNotIn(groups["key"], found_groups, channel.json_body) + + found_groups[groups["key"]] = groups["count"] + + next_batch = channel.json_body.get("next_batch") + + self.assertNotEquals(prev_token, next_batch) + prev_token = next_batch + + if not prev_token: + break + + self.assertEquals(sent_groups, found_groups) + + def test_aggregation_pagination_within_group(self): + """Test that we can paginate within an annotation group. + """ + + expected_event_ids = [] + for _ in range(10): + channel = self._send_relation( + RelationTypes.ANNOTATION, "m.reaction", key=u"👍" + ) + self.assertEquals(200, channel.code, channel.json_body) + expected_event_ids.append(channel.json_body["event_id"]) + + # Also send a different type of reaction so that we test we don't see it + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="a") + self.assertEquals(200, channel.code, channel.json_body) + + prev_token = None + found_event_ids = [] + encoded_key = six.moves.urllib.parse.quote_plus(u"👍".encode("utf-8")) + for _ in range(20): + from_token = "" + if prev_token: + from_token = "&from=" + prev_token + + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s" + "/aggregations/%s/%s/m.reaction/%s?limit=1%s" + % ( + self.room, + self.parent_id, + RelationTypes.ANNOTATION, + encoded_key, + from_token, + ), + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.assertEqual(len(channel.json_body["chunk"]), 1, channel.json_body) + + found_event_ids.extend(e["event_id"] for e in channel.json_body["chunk"]) + + next_batch = channel.json_body.get("next_batch") + + self.assertNotEquals(prev_token, next_batch) + prev_token = next_batch + + if not prev_token: + break + + # We paginated backwards, so reverse + found_event_ids.reverse() + self.assertEquals(found_event_ids, expected_event_ids) + + def test_aggregation(self): + """Test that annotations get correctly aggregated. + """ + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") + self.assertEquals(200, channel.code, channel.json_body) + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") + self.assertEquals(200, channel.code, channel.json_body) + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "b") + self.assertEquals(200, channel.code, channel.json_body) + + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/aggregations/%s" + % (self.room, self.parent_id), + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.assertEquals( + channel.json_body, + { + "chunk": [ + {"type": "m.reaction", "key": "a", "count": 2}, + {"type": "m.reaction", "key": "b", "count": 1}, + ] + }, + ) + + def test_aggregation_must_be_annotation(self): + """Test that aggregations must be annotations. + """ + + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/aggregations/m.replaces/%s?limit=1" + % (self.room, self.parent_id), + ) + self.render(request) + self.assertEquals(400, channel.code, channel.json_body) + + def test_aggregation_get_event(self): + """Test that annotations and references get correctly bundled when + getting the parent event. + """ + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") + self.assertEquals(200, channel.code, channel.json_body) + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") + self.assertEquals(200, channel.code, channel.json_body) + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "b") + self.assertEquals(200, channel.code, channel.json_body) + + channel = self._send_relation(RelationTypes.REFERENCES, "m.room.test") + self.assertEquals(200, channel.code, channel.json_body) + reply_1 = channel.json_body["event_id"] + + channel = self._send_relation(RelationTypes.REFERENCES, "m.room.test") + self.assertEquals(200, channel.code, channel.json_body) + reply_2 = channel.json_body["event_id"] + + request, channel = self.make_request( + "GET", "/rooms/%s/event/%s" % (self.room, self.parent_id) + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.maxDiff = None + + self.assertEquals( + channel.json_body["unsigned"].get("m.relations"), + { + RelationTypes.ANNOTATION: { + "chunk": [ + {"type": "m.reaction", "key": "a", "count": 2}, + {"type": "m.reaction", "key": "b", "count": 1}, + ] + }, + RelationTypes.REFERENCES: { + "chunk": [{"event_id": reply_1}, {"event_id": reply_2}] + }, + }, + ) + def _send_relation(self, relation_type, event_type, key=None): """Helper function to send a relation pointing at `self.parent_id` @@ -116,7 +361,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): """ query = "" if key: - query = "?key=" + six.moves.urllib.parse.quote_plus(key) + query = "?key=" + six.moves.urllib.parse.quote_plus(key.encode("utf-8")) request, channel = self.make_request( "POST", -- cgit 1.5.1 From 33453419b0cbbe29d3f4b376e9eafab10d5d012a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2019 09:56:12 +0100 Subject: Add cache to relations --- synapse/storage/relations.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index db4b842c97..1fd3d4fafc 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -21,6 +21,7 @@ from synapse.api.constants import RelationTypes from synapse.api.errors import SynapseError from synapse.storage._base import SQLBaseStore from synapse.storage.stream import generate_pagination_where_clause +from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) @@ -109,6 +110,7 @@ class AggregationPaginationToken(object): class RelationsStore(SQLBaseStore): + @cached(tree=True) def get_relations_for_event( self, event_id, @@ -216,6 +218,7 @@ class RelationsStore(SQLBaseStore): "get_recent_references_for_event", _get_recent_references_for_event_txn ) + @cached(tree=True) def get_aggregation_groups_for_event( self, event_id, @@ -353,3 +356,8 @@ class RelationsStore(SQLBaseStore): "aggregation_key": aggregation_key, }, ) + + txn.call_after(self.get_relations_for_event.invalidate_many, (parent_id,)) + txn.call_after( + self.get_aggregation_groups_for_event.invalidate_many, (parent_id,) + ) -- cgit 1.5.1 From b5c62c6b2643a138956af0b04521540c270a3e94 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2019 10:18:53 +0100 Subject: Fix relations in worker mode --- synapse/replication/slave/storage/events.py | 13 ++++++++++--- synapse/replication/tcp/streams/_base.py | 1 + synapse/replication/tcp/streams/events.py | 11 ++++++----- synapse/storage/events.py | 12 ++++++++---- synapse/storage/relations.py | 4 +++- 5 files changed, 28 insertions(+), 13 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index b457c5563f..797450bc66 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -23,6 +23,7 @@ from synapse.replication.tcp.streams.events import ( from synapse.storage.event_federation import EventFederationWorkerStore from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore +from synapse.storage.relations import RelationsWorkerStore from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.signatures import SignatureWorkerStore from synapse.storage.state import StateGroupWorkerStore @@ -52,6 +53,7 @@ class SlavedEventStore(EventFederationWorkerStore, EventsWorkerStore, SignatureWorkerStore, UserErasureWorkerStore, + RelationsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): @@ -89,7 +91,7 @@ class SlavedEventStore(EventFederationWorkerStore, for row in rows: self.invalidate_caches_for_event( -token, row.event_id, row.room_id, row.type, row.state_key, - row.redacts, + row.redacts, row.relates_to, backfilled=True, ) return super(SlavedEventStore, self).process_replication_rows( @@ -102,7 +104,7 @@ class SlavedEventStore(EventFederationWorkerStore, if row.type == EventsStreamEventRow.TypeId: self.invalidate_caches_for_event( token, data.event_id, data.room_id, data.type, data.state_key, - data.redacts, + data.redacts, data.relates_to, backfilled=False, ) elif row.type == EventsStreamCurrentStateRow.TypeId: @@ -114,7 +116,8 @@ class SlavedEventStore(EventFederationWorkerStore, raise Exception("Unknown events stream row type %s" % (row.type, )) def invalidate_caches_for_event(self, stream_ordering, event_id, room_id, - etype, state_key, redacts, backfilled): + etype, state_key, redacts, relates_to, + backfilled): self._invalidate_get_event_cache(event_id) self.get_latest_event_ids_in_room.invalidate((room_id,)) @@ -136,3 +139,7 @@ class SlavedEventStore(EventFederationWorkerStore, state_key, stream_ordering ) self.get_invited_rooms_for_user.invalidate((state_key,)) + + if relates_to: + self.get_relations_for_event.invalidate_many((relates_to,)) + self.get_aggregation_groups_for_event.invalidate_many((relates_to,)) diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 8971a6a22e..b6ce7a7bee 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -32,6 +32,7 @@ BackfillStreamRow = namedtuple("BackfillStreamRow", ( "type", # str "state_key", # str, optional "redacts", # str, optional + "relates_to", # str, optional )) PresenceStreamRow = namedtuple("PresenceStreamRow", ( "user_id", # str diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index e0f6e29248..f1290d022a 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -80,11 +80,12 @@ class BaseEventsStreamRow(object): class EventsStreamEventRow(BaseEventsStreamRow): TypeId = "ev" - event_id = attr.ib() # str - room_id = attr.ib() # str - type = attr.ib() # str - state_key = attr.ib() # str, optional - redacts = attr.ib() # str, optional + event_id = attr.ib() # str + room_id = attr.ib() # str + type = attr.ib() # str + state_key = attr.ib() # str, optional + redacts = attr.ib() # str, optional + relates_to = attr.ib() # str, optional @attr.s(slots=True, frozen=True) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 6802bf42ce..b025ebc926 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1657,10 +1657,11 @@ class EventsStore( def get_all_new_forward_event_rows(txn): sql = ( "SELECT e.stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts" + " state_key, redacts, relates_to_id" " FROM events AS e" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN event_relations USING (event_id)" " WHERE ? < stream_ordering AND stream_ordering <= ?" " ORDER BY stream_ordering ASC" " LIMIT ?" @@ -1675,11 +1676,12 @@ class EventsStore( sql = ( "SELECT event_stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts" + " state_key, redacts, relates_to_id" " FROM events AS e" " INNER JOIN ex_outlier_stream USING (event_id)" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN event_relations USING (event_id)" " WHERE ? < event_stream_ordering" " AND event_stream_ordering <= ?" " ORDER BY event_stream_ordering DESC" @@ -1700,10 +1702,11 @@ class EventsStore( def get_all_new_backfill_event_rows(txn): sql = ( "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts" + " state_key, redacts, relates_to_id" " FROM events AS e" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN event_relations USING (event_id)" " WHERE ? > stream_ordering AND stream_ordering >= ?" " ORDER BY stream_ordering ASC" " LIMIT ?" @@ -1718,11 +1721,12 @@ class EventsStore( sql = ( "SELECT -event_stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts" + " state_key, redacts, relates_to_id" " FROM events AS e" " INNER JOIN ex_outlier_stream USING (event_id)" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN event_relations USING (event_id)" " WHERE ? > event_stream_ordering" " AND event_stream_ordering >= ?" " ORDER BY event_stream_ordering DESC" diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 1fd3d4fafc..732418ec65 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -109,7 +109,7 @@ class AggregationPaginationToken(object): return "%d-%d" % (self.count, self.stream) -class RelationsStore(SQLBaseStore): +class RelationsWorkerStore(SQLBaseStore): @cached(tree=True) def get_relations_for_event( self, @@ -318,6 +318,8 @@ class RelationsStore(SQLBaseStore): "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn ) + +class RelationsStore(RelationsWorkerStore): def _handle_event_relations(self, txn, event): """Handles inserting relation data during peristence of events -- cgit 1.5.1 From 2c662ddde4e1277a0dc17295748aa5f0c41fa163 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2019 14:21:39 +0100 Subject: Indirect tuple conversion --- synapse/storage/relations.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 732418ec65..996cb6903a 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -81,6 +81,9 @@ class RelationPaginationToken(object): def to_string(self): return "%d-%d" % (self.topological, self.stream) + def as_tuple(self): + return attr.astuple(self) + @attr.s class AggregationPaginationToken(object): @@ -108,6 +111,9 @@ class AggregationPaginationToken(object): def to_string(self): return "%d-%d" % (self.count, self.stream) + def as_tuple(self): + return attr.astuple(self) + class RelationsWorkerStore(SQLBaseStore): @cached(tree=True) -- cgit 1.5.1 From 7a7eba8302d6566c044ca82c8ac0da65aa85e36b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2019 14:24:58 +0100 Subject: Move parsing of tokens out of storage layer --- synapse/rest/client/v2_alpha/relations.py | 19 +++++++++++++++++++ synapse/storage/relations.py | 16 ++-------------- 2 files changed, 21 insertions(+), 14 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py index 1b53e638eb..41e0a44936 100644 --- a/synapse/rest/client/v2_alpha/relations.py +++ b/synapse/rest/client/v2_alpha/relations.py @@ -32,6 +32,7 @@ from synapse.http.servlet import ( parse_string, ) from synapse.rest.client.transactions import HttpTransactionCache +from synapse.storage.relations import AggregationPaginationToken, RelationPaginationToken from ._base import client_v2_patterns @@ -149,6 +150,12 @@ class RelationPaginationServlet(RestServlet): from_token = parse_string(request, "from") to_token = parse_string(request, "to") + if from_token: + from_token = RelationPaginationToken.from_string(from_token) + + if to_token: + to_token = RelationPaginationToken.from_string(to_token) + result = yield self.store.get_relations_for_event( event_id=parent_id, relation_type=relation_type, @@ -221,6 +228,12 @@ class RelationAggregationPaginationServlet(RestServlet): from_token = parse_string(request, "from") to_token = parse_string(request, "to") + if from_token: + from_token = AggregationPaginationToken.from_string(from_token) + + if to_token: + to_token = AggregationPaginationToken.from_string(to_token) + res = yield self.store.get_aggregation_groups_for_event( event_id=parent_id, event_type=event_type, @@ -289,6 +302,12 @@ class RelationAggregationGroupPaginationServlet(RestServlet): from_token = parse_string(request, "from") to_token = parse_string(request, "to") + if from_token: + from_token = RelationPaginationToken.from_string(from_token) + + if to_token: + to_token = RelationPaginationToken.from_string(to_token) + result = yield self.store.get_relations_for_event( event_id=parent_id, relation_type=relation_type, diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 996cb6903a..de67e305a1 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -54,7 +54,7 @@ class PaginationChunk(object): return d -@attr.s +@attr.s(frozen=True, slots=True) class RelationPaginationToken(object): """Pagination token for relation pagination API. @@ -85,7 +85,7 @@ class RelationPaginationToken(object): return attr.astuple(self) -@attr.s +@attr.s(frozen=True, slots=True) class AggregationPaginationToken(object): """Pagination token for relation aggregation pagination API. @@ -151,12 +151,6 @@ class RelationsWorkerStore(SQLBaseStore): requested. The rows are of the form `{"event_id": "..."}`. """ - if from_token: - from_token = RelationPaginationToken.from_string(from_token) - - if to_token: - to_token = RelationPaginationToken.from_string(to_token) - where_clause = ["relates_to_id = ?"] where_args = [event_id] @@ -258,12 +252,6 @@ class RelationsWorkerStore(SQLBaseStore): match. Each row is a dict with `type`, `key` and `count` fields. """ - if from_token: - from_token = AggregationPaginationToken.from_string(from_token) - - if to_token: - to_token = AggregationPaginationToken.from_string(to_token) - where_clause = ["relates_to_id = ?", "relation_type = ?"] where_args = [event_id, RelationTypes.ANNOTATION] -- cgit 1.5.1 From 895179a4dcc993af9702e3ab36e5d2157fdd0c26 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2019 16:41:05 +0100 Subject: Update docstring --- synapse/storage/stream.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 163363c0c2..824e77c89e 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -77,6 +77,15 @@ def generate_pagination_where_clause( would be generated for dir=b, from_token=(6, 7) and to_token=(5, 3). + Note that tokens are considered to be after the row they are in, e.g. if + a row A has a token T, then we consider A to be before T. This covention + is important when figuring out inequalities for the generated SQL, and + produces the following result: + - If paginatiting forwards then we exclude any rows matching the from + token, but include those that match the to token. + - If paginatiting backwards then we include any rows matching the from + token, but include those that match the to token. + Args: direction (str): Whether we're paginating backwards("b") or forwards ("f"). @@ -131,7 +140,9 @@ def _make_generic_sql_bound(bound, column_names, values, engine): names (tuple[str, str]): The column names. Must *not* be user defined as these get inserted directly into the SQL statement without escapes. - values (tuple[int, int]): The values to bound the columns by. + values (tuple[int|None, int]): The values to bound the columns by. If + the first value is None then only creates a bound on the second + column. engine: The database engine to generate the SQL for Returns: -- cgit 1.5.1 From d46aab3fa8bc02d8db9616490e849b9443fed8e7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 May 2019 16:59:21 +0100 Subject: Add basic editing support --- synapse/events/utils.py | 30 +++++++-- synapse/replication/slave/storage/events.py | 1 + synapse/storage/relations.py | 60 +++++++++++++++++- tests/rest/client/v2_alpha/test_relations.py | 91 +++++++++++++++++++++++++--- 4 files changed, 167 insertions(+), 15 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 16d0c64372..2019ce9b1c 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -346,7 +346,7 @@ class EventClientSerializer(object): defer.returnValue(event) event_id = event.event_id - event = serialize_event(event, time_now, **kwargs) + serialized_event = serialize_event(event, time_now, **kwargs) # If MSC1849 is enabled then we need to look if thre are any relations # we need to bundle in with the event @@ -359,14 +359,36 @@ class EventClientSerializer(object): ) if annotations.chunk: - r = event["unsigned"].setdefault("m.relations", {}) + r = serialized_event["unsigned"].setdefault("m.relations", {}) r[RelationTypes.ANNOTATION] = annotations.to_dict() if references.chunk: - r = event["unsigned"].setdefault("m.relations", {}) + r = serialized_event["unsigned"].setdefault("m.relations", {}) r[RelationTypes.REFERENCES] = references.to_dict() - defer.returnValue(event) + edit = None + if event.type == EventTypes.Message: + edit = yield self.store.get_applicable_edit( + event.event_id, event.type, event.sender, + ) + + if edit: + # If there is an edit replace the content, preserving existing + # relations. + + relations = event.content.get("m.relates_to") + serialized_event["content"] = edit.content.get("m.new_content", {}) + if relations: + serialized_event["content"]["m.relates_to"] = relations + else: + serialized_event["content"].pop("m.relates_to", None) + + r = serialized_event["unsigned"].setdefault("m.relations", {}) + r[RelationTypes.REPLACES] = { + "event_id": edit.event_id, + } + + defer.returnValue(serialized_event) def serialize_events(self, events, time_now, **kwargs): """Serializes multiple events. diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 797450bc66..857128b311 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -143,3 +143,4 @@ class SlavedEventStore(EventFederationWorkerStore, if relates_to: self.get_relations_for_event.invalidate_many((relates_to,)) self.get_aggregation_groups_for_event.invalidate_many((relates_to,)) + self.get_applicable_edit.invalidate_many((relates_to,)) diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index de67e305a1..aa91e52733 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -17,11 +17,13 @@ import logging import attr -from synapse.api.constants import RelationTypes +from twisted.internet import defer + +from synapse.api.constants import EventTypes, RelationTypes from synapse.api.errors import SynapseError from synapse.storage._base import SQLBaseStore from synapse.storage.stream import generate_pagination_where_clause -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks logger = logging.getLogger(__name__) @@ -312,6 +314,59 @@ class RelationsWorkerStore(SQLBaseStore): "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn ) + @cachedInlineCallbacks(tree=True) + def get_applicable_edit(self, event_id, event_type, sender): + """Get the most recent edit (if any) that has happened for the given + event. + + Correctly handles checking whether edits were allowed to happen. + + Args: + event_id (str): The original event ID + event_type (str): The original event type + sender (str): The original event sender + + Returns: + Deferred[EventBase|None]: Returns the most recent edit, if any. + """ + + # We only allow edits for `m.room.message` events that have the same sender + # and event type. We can't assert these things during regular event auth so + # we have to do the post hoc. + + if event_type != EventTypes.Message: + return + + sql = """ + SELECT event_id, origin_server_ts FROM events + INNER JOIN event_relations USING (event_id) + WHERE + relates_to_id = ? + AND relation_type = ? + AND type = ? + AND sender = ? + ORDER by origin_server_ts DESC, event_id DESC + LIMIT 1 + """ + + def _get_applicable_edit_txn(txn): + txn.execute( + sql, (event_id, RelationTypes.REPLACES, event_type, sender) + ) + row = txn.fetchone() + if row: + return row[0] + + edit_id = yield self.runInteraction( + "get_applicable_edit", _get_applicable_edit_txn + ) + + if not edit_id: + return + + edit_event = yield self.get_event(edit_id, allow_none=True) + defer.returnValue(edit_event) + class RelationsStore(RelationsWorkerStore): def _handle_event_relations(self, txn, event): @@ -357,3 +412,4 @@ class RelationsStore(RelationsWorkerStore): txn.call_after( self.get_aggregation_groups_for_event.invalidate_many, (parent_id,) ) + txn.call_after(self.get_applicable_edit.invalidate_many, (parent_id,)) diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py index 775622bd2b..b0e4c47ae3 100644 --- a/tests/rest/client/v2_alpha/test_relations.py +++ b/tests/rest/client/v2_alpha/test_relations.py @@ -14,6 +14,7 @@ # limitations under the License. import itertools +import json import six @@ -102,11 +103,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): # relation event we sent above. self.assertEquals(len(channel.json_body["chunk"]), 1, channel.json_body) self.assert_dict( - { - "event_id": annotation_id, - "sender": self.user_id, - "type": "m.reaction", - }, + {"event_id": annotation_id, "sender": self.user_id, "type": "m.reaction"}, channel.json_body["chunk"][0], ) @@ -330,8 +327,6 @@ class RelationsTestCase(unittest.HomeserverTestCase): self.render(request) self.assertEquals(200, channel.code, channel.json_body) - self.maxDiff = None - self.assertEquals( channel.json_body["unsigned"].get("m.relations"), { @@ -347,7 +342,84 @@ class RelationsTestCase(unittest.HomeserverTestCase): }, ) - def _send_relation(self, relation_type, event_type, key=None): + def test_edit(self): + """Test that a simple edit works. + """ + + new_body = {"msgtype": "m.text", "body": "I've been edited!"} + channel = self._send_relation( + RelationTypes.REPLACES, + "m.room.message", + content={"msgtype": "m.text", "body": "foo", "m.new_content": new_body}, + ) + self.assertEquals(200, channel.code, channel.json_body) + + edit_event_id = channel.json_body["event_id"] + + request, channel = self.make_request( + "GET", "/rooms/%s/event/%s" % (self.room, self.parent_id) + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.assertEquals(channel.json_body["content"], new_body) + + self.assertEquals( + channel.json_body["unsigned"].get("m.relations"), + {RelationTypes.REPLACES: {"event_id": edit_event_id}}, + ) + + def test_multi_edit(self): + """Test that multiple edits, including attempts by people who + shouldn't be allowed, are correctly handled. + """ + + channel = self._send_relation( + RelationTypes.REPLACES, + "m.room.message", + content={ + "msgtype": "m.text", + "body": "Wibble", + "m.new_content": {"msgtype": "m.text", "body": "First edit"}, + }, + ) + self.assertEquals(200, channel.code, channel.json_body) + + new_body = {"msgtype": "m.text", "body": "I've been edited!"} + channel = self._send_relation( + RelationTypes.REPLACES, + "m.room.message", + content={"msgtype": "m.text", "body": "foo", "m.new_content": new_body}, + ) + self.assertEquals(200, channel.code, channel.json_body) + + edit_event_id = channel.json_body["event_id"] + + channel = self._send_relation( + RelationTypes.REPLACES, + "m.room.message.WRONG_TYPE", + content={ + "msgtype": "m.text", + "body": "Wibble", + "m.new_content": {"msgtype": "m.text", "body": "Edit, but wrong type"}, + }, + ) + self.assertEquals(200, channel.code, channel.json_body) + + request, channel = self.make_request( + "GET", "/rooms/%s/event/%s" % (self.room, self.parent_id) + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.assertEquals(channel.json_body["content"], new_body) + + self.assertEquals( + channel.json_body["unsigned"].get("m.relations"), + {RelationTypes.REPLACES: {"event_id": edit_event_id}}, + ) + + def _send_relation(self, relation_type, event_type, key=None, content={}): """Helper function to send a relation pointing at `self.parent_id` Args: @@ -355,6 +427,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): event_type (str): The type of the event to create key (str|None): The aggregation key used for m.annotation relation type. + content(dict|None): The content of the created event. Returns: FakeChannel @@ -367,7 +440,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): "POST", "/_matrix/client/unstable/rooms/%s/send_relation/%s/%s/%s%s" % (self.room, self.parent_id, relation_type, event_type, query), - b"{}", + json.dumps(content).encode("utf-8"), ) self.render(request) return channel -- cgit 1.5.1 From 5dbff345094327e61fa9c1425ba0f955c9530ba7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 17 May 2019 15:48:04 +0100 Subject: Fixup bsaed on review comments --- synapse/events/utils.py | 4 +--- synapse/replication/slave/storage/events.py | 2 +- synapse/storage/relations.py | 32 +++++++++++++++-------------- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 2019ce9b1c..bf3c8f8dc1 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -368,9 +368,7 @@ class EventClientSerializer(object): edit = None if event.type == EventTypes.Message: - edit = yield self.store.get_applicable_edit( - event.event_id, event.type, event.sender, - ) + edit = yield self.store.get_applicable_edit(event_id) if edit: # If there is an edit replace the content, preserving existing diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 857128b311..a3952506c1 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -143,4 +143,4 @@ class SlavedEventStore(EventFederationWorkerStore, if relates_to: self.get_relations_for_event.invalidate_many((relates_to,)) self.get_aggregation_groups_for_event.invalidate_many((relates_to,)) - self.get_applicable_edit.invalidate_many((relates_to,)) + self.get_applicable_edit.invalidate((relates_to,)) diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index aa91e52733..6e216066ab 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -19,7 +19,7 @@ import attr from twisted.internet import defer -from synapse.api.constants import EventTypes, RelationTypes +from synapse.api.constants import RelationTypes from synapse.api.errors import SynapseError from synapse.storage._base import SQLBaseStore from synapse.storage.stream import generate_pagination_where_clause @@ -314,8 +314,8 @@ class RelationsWorkerStore(SQLBaseStore): "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn ) - @cachedInlineCallbacks(tree=True) - def get_applicable_edit(self, event_id, event_type, sender): + @cachedInlineCallbacks() + def get_applicable_edit(self, event_id): """Get the most recent edit (if any) that has happened for the given event. @@ -323,8 +323,6 @@ class RelationsWorkerStore(SQLBaseStore): Args: event_id (str): The original event ID - event_type (str): The original event type - sender (str): The original event sender Returns: Deferred[EventBase|None]: Returns the most recent edit, if any. @@ -332,26 +330,28 @@ class RelationsWorkerStore(SQLBaseStore): # We only allow edits for `m.room.message` events that have the same sender # and event type. We can't assert these things during regular event auth so - # we have to do the post hoc. - - if event_type != EventTypes.Message: - return + # we have to do the checks post hoc. + # Fetches latest edit that has the same type and sender as the + # original, and is an `m.room.message`. sql = """ - SELECT event_id, origin_server_ts FROM events + SELECT edit.event_id FROM events AS edit INNER JOIN event_relations USING (event_id) + INNER JOIN events AS original ON + original.event_id = relates_to_id + AND edit.type = original.type + AND edit.sender = original.sender WHERE relates_to_id = ? AND relation_type = ? - AND type = ? - AND sender = ? - ORDER by origin_server_ts DESC, event_id DESC + AND edit.type = 'm.room.message' + ORDER by edit.origin_server_ts DESC, edit.event_id DESC LIMIT 1 """ def _get_applicable_edit_txn(txn): txn.execute( - sql, (event_id, RelationTypes.REPLACES, event_type, sender) + sql, (event_id, RelationTypes.REPLACES,) ) row = txn.fetchone() if row: @@ -412,4 +412,6 @@ class RelationsStore(RelationsWorkerStore): txn.call_after( self.get_aggregation_groups_for_event.invalidate_many, (parent_id,) ) - txn.call_after(self.get_applicable_edit.invalidate_many, (parent_id,)) + + if rel_type == RelationTypes.REPLACES: + txn.call_after(self.get_applicable_edit.invalidate, (parent_id,)) -- cgit 1.5.1 From 8dd9cca8ead9fc0cf0789edf9fcfce04814c5eda Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 17 May 2019 16:40:51 +0100 Subject: Spelling and clarifications --- synapse/storage/stream.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 824e77c89e..529ad4ea79 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -78,12 +78,12 @@ def generate_pagination_where_clause( would be generated for dir=b, from_token=(6, 7) and to_token=(5, 3). Note that tokens are considered to be after the row they are in, e.g. if - a row A has a token T, then we consider A to be before T. This covention + a row A has a token T, then we consider A to be before T. This convention is important when figuring out inequalities for the generated SQL, and produces the following result: - - If paginatiting forwards then we exclude any rows matching the from + - If paginating forwards then we exclude any rows matching the from token, but include those that match the to token. - - If paginatiting backwards then we include any rows matching the from + - If paginating backwards then we include any rows matching the from token, but include those that match the to token. Args: @@ -92,8 +92,12 @@ def generate_pagination_where_clause( column_names (tuple[str, str]): The column names to bound. Must *not* be user defined as these get inserted directly into the SQL statement without escapes. - from_token (tuple[int, int]|None) - to_token (tuple[int, int]|None) + from_token (tuple[int, int]|None): The start point for the pagination. + This is an exclusive minimum bound if direction is "f", and an + inclusive maximum bound if direction is "b". + to_token (tuple[int, int]|None): The endpoint point for the pagination. + This is an inclusive maximum bound if direction is "f", and an + exclusive minimum bound if direction is "b". engine: The database engine to generate the clauses for Returns: -- cgit 1.5.1 From b63cc325a9362874a13f0079589afccd7d108f1f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 17 May 2019 18:01:39 +0100 Subject: Only count aggregations from distinct senders As a user isn't allowed to send a single emoji more than once. --- synapse/storage/relations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 6e216066ab..42f587a7d8 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -280,7 +280,7 @@ class RelationsWorkerStore(SQLBaseStore): having_clause = "" sql = """ - SELECT type, aggregation_key, COUNT(*), MAX(stream_ordering) + SELECT type, aggregation_key, COUNT(DISTINCT sender), MAX(stream_ordering) FROM event_relations INNER JOIN events USING (event_id) WHERE {where_clause} -- cgit 1.5.1 From ad5b4074e1a84b1e50a97144fbf1902ddc89db73 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 17 May 2019 19:37:31 +0100 Subject: Add startup background job for account validity If account validity is enabled in the server's configuration, this job will run at startup as a background job and will stick an expiration date to any registered account missing one. --- synapse/storage/_base.py | 62 +++++++++++++++++++++++++++++ synapse/storage/registration.py | 16 ++------ tests/rest/client/v2_alpha/test_register.py | 55 +++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 12 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 983ce026e1..06d516c9e2 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2017-2018 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -227,6 +229,8 @@ class SQLBaseStore(object): # A set of tables that are not safe to use native upserts in. self._unsafe_to_upsert_tables = set(UNIQUE_INDEX_BACKGROUND_UPDATES.keys()) + self._account_validity = self.hs.config.account_validity + # We add the user_directory_search table to the blacklist on SQLite # because the existing search table does not have an index, making it # unsafe to use native upserts. @@ -243,6 +247,14 @@ class SQLBaseStore(object): self._check_safe_to_upsert, ) + if self._account_validity.enabled: + self._clock.call_later( + 0.0, + run_as_background_process, + "account_validity_set_expiration_dates", + self._set_expiration_date_when_missing, + ) + @defer.inlineCallbacks def _check_safe_to_upsert(self): """ @@ -275,6 +287,56 @@ class SQLBaseStore(object): self._check_safe_to_upsert, ) + @defer.inlineCallbacks + def _set_expiration_date_when_missing(self): + """ + Retrieves the list of registered users that don't have an expiration date, and + adds an expiration date for each of them. + """ + + def select_users_with_no_expiration_date_txn(txn): + """Retrieves the list of registered users with no expiration date from the + database. + """ + sql = ( + "SELECT users.name FROM users" + " LEFT JOIN account_validity ON (users.name = account_validity.user_id)" + " WHERE account_validity.user_id is NULL;" + ) + txn.execute(sql, []) + return self.cursor_to_dict(txn) + + res = yield self.runInteraction( + "get_users_with_no_expiration_date", + select_users_with_no_expiration_date_txn, + ) + + if res: + for user in res: + self.runInteraction( + "set_expiration_date_for_user_background", + self.set_expiration_date_for_user_txn, + user["name"], + ) + + def set_expiration_date_for_user_txn(self, txn, user_id): + """Sets an expiration date to the account with the given user ID. + + Args: + user_id (str): User ID to set an expiration date for. + """ + now_ms = self._clock.time_msec() + expiration_ts = now_ms + self._account_validity.period + self._simple_insert_txn( + txn, + "account_validity", + values={ + "user_id": user_id, + "expiration_ts_ms": expiration_ts, + "email_sent": False, + }, + ) + def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 03a06a83d6..4cf159ba81 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- -# Copyright 2014 - 2016 OpenMarket Ltd +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2017-2018 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -725,17 +727,7 @@ class RegistrationStore( raise StoreError(400, "User ID already taken.", errcode=Codes.USER_IN_USE) if self._account_validity.enabled: - now_ms = self.clock.time_msec() - expiration_ts = now_ms + self._account_validity.period - self._simple_insert_txn( - txn, - "account_validity", - values={ - "user_id": user_id, - "expiration_ts_ms": expiration_ts, - "email_sent": False, - } - ) + self.set_expiration_date_for_user_txn(txn, user_id) if token: # it's possible for this to get a conflict, but only for a single user diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 65685883db..d4a1d4d50c 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -1,3 +1,20 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2017-2018 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import datetime import json import os @@ -409,3 +426,41 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.result["code"], b"200", channel.result) self.assertEqual(len(self.email_attempts), 1) + + +class AccountValidityBackgroundJobTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + ] + + def make_homeserver(self, reactor, clock): + self.validity_period = 10 + + config = self.default_config() + + config["enable_registration"] = True + config["account_validity"] = { + "enabled": False, + } + + self.hs = self.setup_test_homeserver(config=config) + self.hs.config.account_validity.period = self.validity_period + + self.store = self.hs.get_datastore() + + return self.hs + + def test_background_job(self): + """ + Tests whether the account validity startup background job does the right thing, + which is sticking an expiration date to every account that doesn't already have + one. + """ + user_id = self.register_user("kermit", "user") + + now_ms = self.hs.clock.time_msec() + self.get_success(self.store._set_expiration_date_when_missing()) + + res = self.get_success(self.store.get_expiration_ts_for_user(user_id)) + self.assertEqual(res, now_ms + self.validity_period) -- cgit 1.5.1 From 935af0da380f39ba284b78054270331bdbad7712 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 20 May 2019 10:13:05 +0100 Subject: Correctly update aggregation counts after redaction --- synapse/storage/events.py | 3 +++ synapse/storage/relations.py | 17 +++++++++++++ tests/rest/client/v2_alpha/test_relations.py | 37 ++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index b025ebc926..881d6d0126 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1325,6 +1325,9 @@ class EventsStore( txn, event.room_id, event.redacts ) + # Remove from relations table. + self._handle_redaction(txn, event.redacts) + # Update the event_forward_extremities, event_backward_extremities and # event_edges tables. self._handle_mult_prev_events( diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 6e216066ab..63e6185ee3 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -415,3 +415,20 @@ class RelationsStore(RelationsWorkerStore): if rel_type == RelationTypes.REPLACES: txn.call_after(self.get_applicable_edit.invalidate, (parent_id,)) + + def _handle_redaction(self, txn, redacted_event_id): + """Handles receiving a redaction and checking whether we need to remove + any redacted relations from the database. + + Args: + txn + redacted_event_id (str): The event that was redacted. + """ + + self._simple_delete_txn( + txn, + table="event_relations", + keyvalues={ + "event_id": redacted_event_id, + } + ) diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py index cd965167f8..3737cdc396 100644 --- a/tests/rest/client/v2_alpha/test_relations.py +++ b/tests/rest/client/v2_alpha/test_relations.py @@ -320,6 +320,43 @@ class RelationsTestCase(unittest.HomeserverTestCase): }, ) + def test_aggregation_redactions(self): + """Test that annotations get correctly aggregated after a redactions. + """ + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") + self.assertEquals(200, channel.code, channel.json_body) + to_redact_event_id = channel.json_body["event_id"] + + channel = self._send_relation( + RelationTypes.ANNOTATION, "m.reaction", "a", access_token=self.user2_token + ) + self.assertEquals(200, channel.code, channel.json_body) + + # Now lets redact the 'a' reaction + request, channel = self.make_request( + "POST", + "/_matrix/client/r0/rooms/%s/redact/%s" % (self.room, to_redact_event_id), + access_token=self.user_token, + content={}, + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/aggregations/%s" + % (self.room, self.parent_id), + access_token=self.user_token, + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.assertEquals( + channel.json_body, + {"chunk": [{"type": "m.reaction", "key": "a", "count": 1}]}, + ) + def test_aggregation_must_be_annotation(self): """Test that aggregations must be annotations. """ -- cgit 1.5.1 From 1dff859d6aed58561a2b5913e5c9b897bbd3599c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 20 May 2019 14:31:19 +0100 Subject: Rename relation types to match MSC --- synapse/api/constants.py | 4 ++-- synapse/events/utils.py | 6 +++--- synapse/storage/relations.py | 8 ++++---- tests/rest/client/v2_alpha/test_relations.py | 22 +++++++++++----------- 4 files changed, 20 insertions(+), 20 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 28862a1d25..6b347b1749 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -125,5 +125,5 @@ class RelationTypes(object): """The types of relations known to this server. """ ANNOTATION = "m.annotation" - REPLACES = "m.replaces" - REFERENCES = "m.references" + REPLACE = "m.replace" + REFERENCE = "m.reference" diff --git a/synapse/events/utils.py b/synapse/events/utils.py index bf3c8f8dc1..27a2a9ef98 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -355,7 +355,7 @@ class EventClientSerializer(object): event_id, ) references = yield self.store.get_relations_for_event( - event_id, RelationTypes.REFERENCES, direction="f", + event_id, RelationTypes.REFERENCE, direction="f", ) if annotations.chunk: @@ -364,7 +364,7 @@ class EventClientSerializer(object): if references.chunk: r = serialized_event["unsigned"].setdefault("m.relations", {}) - r[RelationTypes.REFERENCES] = references.to_dict() + r[RelationTypes.REFERENCE] = references.to_dict() edit = None if event.type == EventTypes.Message: @@ -382,7 +382,7 @@ class EventClientSerializer(object): serialized_event["content"].pop("m.relates_to", None) r = serialized_event["unsigned"].setdefault("m.relations", {}) - r[RelationTypes.REPLACES] = { + r[RelationTypes.REPLACE] = { "event_id": edit.event_id, } diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 63e6185ee3..493abe405e 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -351,7 +351,7 @@ class RelationsWorkerStore(SQLBaseStore): def _get_applicable_edit_txn(txn): txn.execute( - sql, (event_id, RelationTypes.REPLACES,) + sql, (event_id, RelationTypes.REPLACE,) ) row = txn.fetchone() if row: @@ -384,8 +384,8 @@ class RelationsStore(RelationsWorkerStore): rel_type = relation.get("rel_type") if rel_type not in ( RelationTypes.ANNOTATION, - RelationTypes.REFERENCES, - RelationTypes.REPLACES, + RelationTypes.REFERENCE, + RelationTypes.REPLACE, ): # Unknown relation type return @@ -413,7 +413,7 @@ class RelationsStore(RelationsWorkerStore): self.get_aggregation_groups_for_event.invalidate_many, (parent_id,) ) - if rel_type == RelationTypes.REPLACES: + if rel_type == RelationTypes.REPLACE: txn.call_after(self.get_applicable_edit.invalidate, (parent_id,)) def _handle_redaction(self, txn, redacted_event_id): diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py index b3dc4bd5bc..3d040cf118 100644 --- a/tests/rest/client/v2_alpha/test_relations.py +++ b/tests/rest/client/v2_alpha/test_relations.py @@ -363,8 +363,8 @@ class RelationsTestCase(unittest.HomeserverTestCase): request, channel = self.make_request( "GET", - "/_matrix/client/unstable/rooms/%s/aggregations/%s/m.replace?limit=1" - % (self.room, self.parent_id), + "/_matrix/client/unstable/rooms/%s/aggregations/%s/%s?limit=1" + % (self.room, self.parent_id, RelationTypes.REPLACE), access_token=self.user_token, ) self.render(request) @@ -386,11 +386,11 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "b") self.assertEquals(200, channel.code, channel.json_body) - channel = self._send_relation(RelationTypes.REFERENCES, "m.room.test") + channel = self._send_relation(RelationTypes.REFERENCE, "m.room.test") self.assertEquals(200, channel.code, channel.json_body) reply_1 = channel.json_body["event_id"] - channel = self._send_relation(RelationTypes.REFERENCES, "m.room.test") + channel = self._send_relation(RelationTypes.REFERENCE, "m.room.test") self.assertEquals(200, channel.code, channel.json_body) reply_2 = channel.json_body["event_id"] @@ -411,7 +411,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): {"type": "m.reaction", "key": "b", "count": 1}, ] }, - RelationTypes.REFERENCES: { + RelationTypes.REFERENCE: { "chunk": [{"event_id": reply_1}, {"event_id": reply_2}] }, }, @@ -423,7 +423,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): new_body = {"msgtype": "m.text", "body": "I've been edited!"} channel = self._send_relation( - RelationTypes.REPLACES, + RelationTypes.REPLACE, "m.room.message", content={"msgtype": "m.text", "body": "foo", "m.new_content": new_body}, ) @@ -443,7 +443,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): self.assertEquals( channel.json_body["unsigned"].get("m.relations"), - {RelationTypes.REPLACES: {"event_id": edit_event_id}}, + {RelationTypes.REPLACE: {"event_id": edit_event_id}}, ) def test_multi_edit(self): @@ -452,7 +452,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): """ channel = self._send_relation( - RelationTypes.REPLACES, + RelationTypes.REPLACE, "m.room.message", content={ "msgtype": "m.text", @@ -464,7 +464,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): new_body = {"msgtype": "m.text", "body": "I've been edited!"} channel = self._send_relation( - RelationTypes.REPLACES, + RelationTypes.REPLACE, "m.room.message", content={"msgtype": "m.text", "body": "foo", "m.new_content": new_body}, ) @@ -473,7 +473,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): edit_event_id = channel.json_body["event_id"] channel = self._send_relation( - RelationTypes.REPLACES, + RelationTypes.REPLACE, "m.room.message.WRONG_TYPE", content={ "msgtype": "m.text", @@ -495,7 +495,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): self.assertEquals( channel.json_body["unsigned"].get("m.relations"), - {RelationTypes.REPLACES: {"event_id": edit_event_id}}, + {RelationTypes.REPLACE: {"event_id": edit_event_id}}, ) def _send_relation( -- cgit 1.5.1 From c7ec06e8a6909343f5860a5eecbe3aa69f03c151 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 20 May 2019 17:39:05 +0100 Subject: Block attempts to annotate the same event twice --- synapse/handlers/message.py | 16 +++++++++- synapse/storage/relations.py | 48 ++++++++++++++++++++++++++-- tests/rest/client/v2_alpha/test_relations.py | 27 +++++++++++++++- 3 files changed, 86 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 7b2c33a922..0c892c8dba 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -22,7 +22,7 @@ from canonicaljson import encode_canonical_json, json from twisted.internet import defer from twisted.internet.defer import succeed -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventTypes, Membership, RelationTypes from synapse.api.errors import ( AuthError, Codes, @@ -601,6 +601,20 @@ class EventCreationHandler(object): self.validator.validate_new(event) + # We now check that if this event is an annotation that the can't + # annotate the same way twice (e.g. stops users from liking an event + # multiple times). + relation = event.content.get("m.relates_to", {}) + if relation.get("rel_type") == RelationTypes.ANNOTATION: + relates_to = relation["event_id"] + aggregation_key = relation["key"] + + already_exists = yield self.store.has_user_annotated_event( + relates_to, event.type, aggregation_key, event.sender, + ) + if already_exists: + raise SynapseError(400, "Can't send same reaction twice") + logger.debug( "Created event %s", event.event_id, diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 493abe405e..7d51b38d77 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -350,9 +350,7 @@ class RelationsWorkerStore(SQLBaseStore): """ def _get_applicable_edit_txn(txn): - txn.execute( - sql, (event_id, RelationTypes.REPLACE,) - ) + txn.execute(sql, (event_id, RelationTypes.REPLACE)) row = txn.fetchone() if row: return row[0] @@ -367,6 +365,50 @@ class RelationsWorkerStore(SQLBaseStore): edit_event = yield self.get_event(edit_id, allow_none=True) defer.returnValue(edit_event) + def has_user_annotated_event(self, parent_id, event_type, aggregation_key, sender): + """Check if a user has already annotated an event with the same key + (e.g. already liked an event). + + Args: + parent_id (str): The event being annotated + event_type (str): The event type of the annotation + aggregation_key (str): The aggregation key of the annotation + sender (str): The sender of the annotation + + Returns: + Deferred[bool] + """ + + sql = """ + SELECT 1 FROM event_relations + INNER JOIN events USING (event_id) + WHERE + relates_to_id = ? + AND relation_type = ? + AND type = ? + AND sender = ? + AND aggregation_key = ? + LIMIT 1; + """ + + def _get_if_user_has_annotated_event(txn): + txn.execute( + sql, + ( + parent_id, + RelationTypes.ANNOTATION, + event_type, + sender, + aggregation_key, + ), + ) + + return bool(txn.fetchone()) + + return self.runInteraction( + "get_if_user_has_annotated_event", _get_if_user_has_annotated_event + ) + class RelationsStore(RelationsWorkerStore): def _handle_event_relations(self, txn, event): diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py index 3d040cf118..43b3049daa 100644 --- a/tests/rest/client/v2_alpha/test_relations.py +++ b/tests/rest/client/v2_alpha/test_relations.py @@ -90,6 +90,15 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel = self._send_relation(RelationTypes.ANNOTATION, EventTypes.Member) self.assertEquals(400, channel.code, channel.json_body) + def test_deny_double_react(self): + """Test that we deny relations on membership events + """ + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") + self.assertEquals(200, channel.code, channel.json_body) + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") + self.assertEquals(400, channel.code, channel.json_body) + def test_basic_paginate_relations(self): """Tests that calling pagination API corectly the latest relations. """ @@ -234,14 +243,30 @@ class RelationsTestCase(unittest.HomeserverTestCase): """Test that we can paginate within an annotation group. """ + # We need to create ten separate users to send each reaction. + access_tokens = [self.user_token, self.user2_token] + idx = 0 + while len(access_tokens) < 10: + user_id, token = self._create_user("test" + str(idx)) + idx += 1 + + self.helper.join(self.room, user=user_id, tok=token) + access_tokens.append(token) + + idx = 0 expected_event_ids = [] for _ in range(10): channel = self._send_relation( - RelationTypes.ANNOTATION, "m.reaction", key=u"👍" + RelationTypes.ANNOTATION, + "m.reaction", + key=u"👍", + access_token=access_tokens[idx], ) self.assertEquals(200, channel.code, channel.json_body) expected_event_ids.append(channel.json_body["event_id"]) + idx += 1 + # Also send a different type of reaction so that we test we don't see it channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="a") self.assertEquals(200, channel.code, channel.json_body) -- cgit 1.5.1 From 5ceee46c6bd55376ff2188b6e674035d7da95f24 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 21 May 2019 13:38:51 +0100 Subject: Do the select and insert in a single transaction --- synapse/storage/_base.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 06d516c9e2..fa6839ceca 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -304,21 +304,17 @@ class SQLBaseStore(object): " WHERE account_validity.user_id is NULL;" ) txn.execute(sql, []) - return self.cursor_to_dict(txn) - res = yield self.runInteraction( + res = self.cursor_to_dict(txn) + if res: + for user in res: + self.set_expiration_date_for_user_txn(txn, user["name"]) + + yield self.runInteraction( "get_users_with_no_expiration_date", select_users_with_no_expiration_date_txn, ) - if res: - for user in res: - self.runInteraction( - "set_expiration_date_for_user_background", - self.set_expiration_date_for_user_txn, - user["name"], - ) - def set_expiration_date_for_user_txn(self, txn, user_id): """Sets an expiration date to the account with the given user ID. -- cgit 1.5.1 From 7b0e804a4a684a210abf5107e720582f68f464e7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 21 May 2019 15:21:38 +0100 Subject: Fix get_max_topological_token to never return None --- synapse/storage/stream.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 529ad4ea79..0b5f5f9663 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -592,8 +592,18 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) def get_max_topological_token(self, room_id, stream_key): + """Get the max topological token in a room that before given stream + ordering. + + Args: + room_id (str) + stream_key (int) + + Returns: + Deferred[int] + """ sql = ( - "SELECT max(topological_ordering) FROM events" + "SELECT coalesce(max(topological_ordering), 0) FROM events" " WHERE room_id = ? AND stream_ordering < ?" ) return self._execute( -- cgit 1.5.1 From c4aef549ad1f55661675a789f89fe9e041fac874 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 21 May 2019 16:10:54 +0100 Subject: Exclude soft-failed events from fwd-extremity candidates. (#5146) When considering the candidates to be forward-extremities, we must exclude soft failures. Hopefully fixes #5090. --- changelog.d/5146.bugfix | 1 + synapse/handlers/federation.py | 7 ++++++- synapse/storage/events.py | 9 +++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 changelog.d/5146.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/5146.bugfix b/changelog.d/5146.bugfix new file mode 100644 index 0000000000..a54abed92b --- /dev/null +++ b/changelog.d/5146.bugfix @@ -0,0 +1 @@ +Exclude soft-failed events from forward-extremity candidates: fixes "No forward extremities left!" error. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 0684778882..2202ed699a 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1916,6 +1916,11 @@ class FederationHandler(BaseHandler): event.room_id, latest_event_ids=extrem_ids, ) + logger.debug( + "Doing soft-fail check for %s: state %s", + event.event_id, current_state_ids, + ) + # Now check if event pass auth against said current state auth_types = auth_types_for_event(event) current_state_ids = [ @@ -1932,7 +1937,7 @@ class FederationHandler(BaseHandler): self.auth.check(room_version, event, auth_events=current_auth_events) except AuthError as e: logger.warn( - "Failed current state auth resolution for %r because %s", + "Soft-failing %r because %s", event, e, ) event.internal_metadata.soft_failed = True diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 881d6d0126..2ffc27ff41 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -575,10 +575,11 @@ class EventsStore( def _get_events(txn, batch): sql = """ - SELECT prev_event_id + SELECT prev_event_id, internal_metadata FROM event_edges INNER JOIN events USING (event_id) LEFT JOIN rejections USING (event_id) + LEFT JOIN event_json USING (event_id) WHERE prev_event_id IN (%s) AND NOT events.outlier @@ -588,7 +589,11 @@ class EventsStore( ) txn.execute(sql, batch) - results.extend(r[0] for r in txn) + results.extend( + r[0] + for r in txn + if not json.loads(r[1]).get("soft_failed") + ) for chunk in batch_iter(event_ids, 100): yield self.runInteraction("_get_events_which_are_prevs", _get_events, chunk) -- cgit 1.5.1 From 4a30e4acb4ef14431914bd42ad09a51bd81d6c3e Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 21 May 2019 11:36:50 -0500 Subject: Room Statistics (#4338) --- changelog.d/4338.feature | 1 + docs/sample_config.yaml | 16 ++ synapse/api/constants.py | 1 + synapse/config/homeserver.py | 42 ++- synapse/config/stats.py | 60 ++++ synapse/handlers/stats.py | 325 +++++++++++++++++++++ synapse/server.py | 6 + synapse/storage/__init__.py | 2 + synapse/storage/events_worker.py | 24 ++ synapse/storage/roommember.py | 32 +++ synapse/storage/schema/delta/54/stats.sql | 80 ++++++ synapse/storage/state_deltas.py | 12 +- synapse/storage/stats.py | 450 ++++++++++++++++++++++++++++++ tests/handlers/test_stats.py | 251 +++++++++++++++++ tests/rest/client/v1/utils.py | 17 ++ 15 files changed, 1306 insertions(+), 13 deletions(-) create mode 100644 changelog.d/4338.feature create mode 100644 synapse/config/stats.py create mode 100644 synapse/handlers/stats.py create mode 100644 synapse/storage/schema/delta/54/stats.sql create mode 100644 synapse/storage/stats.py create mode 100644 tests/handlers/test_stats.py (limited to 'synapse/storage') diff --git a/changelog.d/4338.feature b/changelog.d/4338.feature new file mode 100644 index 0000000000..01285e965c --- /dev/null +++ b/changelog.d/4338.feature @@ -0,0 +1 @@ +Synapse now more efficiently collates room statistics. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index f658ec8ecd..559fbcdd01 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1153,6 +1153,22 @@ password_config: # + +# Local statistics collection. Used in populating the room directory. +# +# 'bucket_size' controls how large each statistics timeslice is. It can +# be defined in a human readable short form -- e.g. "1d", "1y". +# +# 'retention' controls how long historical statistics will be kept for. +# It can be defined in a human readable short form -- e.g. "1d", "1y". +# +# +#stats: +# enabled: true +# bucket_size: 1d +# retention: 1y + + # Server Notices room configuration # # Uncomment this section to enable a room which can be used to send notices diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 6b347b1749..ee129c8689 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -79,6 +79,7 @@ class EventTypes(object): RoomHistoryVisibility = "m.room.history_visibility" CanonicalAlias = "m.room.canonical_alias" + Encryption = "m.room.encryption" RoomAvatar = "m.room.avatar" RoomEncryption = "m.room.encryption" GuestAccess = "m.room.guest_access" diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 727fdc54d8..5c4fc8ff21 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from .api import ApiConfig from .appservice import AppServiceConfig from .captcha import CaptchaConfig @@ -36,20 +37,41 @@ from .saml2_config import SAML2Config from .server import ServerConfig from .server_notices_config import ServerNoticesConfig from .spam_checker import SpamCheckerConfig +from .stats import StatsConfig from .tls import TlsConfig from .user_directory import UserDirectoryConfig from .voip import VoipConfig from .workers import WorkerConfig -class HomeServerConfig(ServerConfig, TlsConfig, DatabaseConfig, LoggingConfig, - RatelimitConfig, ContentRepositoryConfig, CaptchaConfig, - VoipConfig, RegistrationConfig, MetricsConfig, ApiConfig, - AppServiceConfig, KeyConfig, SAML2Config, CasConfig, - JWTConfig, PasswordConfig, EmailConfig, - WorkerConfig, PasswordAuthProviderConfig, PushConfig, - SpamCheckerConfig, GroupsConfig, UserDirectoryConfig, - ConsentConfig, - ServerNoticesConfig, RoomDirectoryConfig, - ): +class HomeServerConfig( + ServerConfig, + TlsConfig, + DatabaseConfig, + LoggingConfig, + RatelimitConfig, + ContentRepositoryConfig, + CaptchaConfig, + VoipConfig, + RegistrationConfig, + MetricsConfig, + ApiConfig, + AppServiceConfig, + KeyConfig, + SAML2Config, + CasConfig, + JWTConfig, + PasswordConfig, + EmailConfig, + WorkerConfig, + PasswordAuthProviderConfig, + PushConfig, + SpamCheckerConfig, + GroupsConfig, + UserDirectoryConfig, + ConsentConfig, + StatsConfig, + ServerNoticesConfig, + RoomDirectoryConfig, +): pass diff --git a/synapse/config/stats.py b/synapse/config/stats.py new file mode 100644 index 0000000000..80fc1b9dd0 --- /dev/null +++ b/synapse/config/stats.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division + +import sys + +from ._base import Config + + +class StatsConfig(Config): + """Stats Configuration + Configuration for the behaviour of synapse's stats engine + """ + + def read_config(self, config): + self.stats_enabled = True + self.stats_bucket_size = 86400 + self.stats_retention = sys.maxsize + stats_config = config.get("stats", None) + if stats_config: + self.stats_enabled = stats_config.get("enabled", self.stats_enabled) + self.stats_bucket_size = ( + self.parse_duration(stats_config.get("bucket_size", "1d")) / 1000 + ) + self.stats_retention = ( + self.parse_duration( + stats_config.get("retention", "%ds" % (sys.maxsize,)) + ) + / 1000 + ) + + def default_config(self, config_dir_path, server_name, **kwargs): + return """ + # Local statistics collection. Used in populating the room directory. + # + # 'bucket_size' controls how large each statistics timeslice is. It can + # be defined in a human readable short form -- e.g. "1d", "1y". + # + # 'retention' controls how long historical statistics will be kept for. + # It can be defined in a human readable short form -- e.g. "1d", "1y". + # + # + #stats: + # enabled: true + # bucket_size: 1d + # retention: 1y + """ diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py new file mode 100644 index 0000000000..0e92b405ba --- /dev/null +++ b/synapse/handlers/stats.py @@ -0,0 +1,325 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.api.constants import EventTypes, JoinRules, Membership +from synapse.handlers.state_deltas import StateDeltasHandler +from synapse.metrics import event_processing_positions +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.types import UserID +from synapse.util.metrics import Measure + +logger = logging.getLogger(__name__) + + +class StatsHandler(StateDeltasHandler): + """Handles keeping the *_stats tables updated with a simple time-series of + information about the users, rooms and media on the server, such that admins + have some idea of who is consuming their resources. + + Heavily derived from UserDirectoryHandler + """ + + def __init__(self, hs): + super(StatsHandler, self).__init__(hs) + self.hs = hs + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + self.server_name = hs.hostname + self.clock = hs.get_clock() + self.notifier = hs.get_notifier() + self.is_mine_id = hs.is_mine_id + self.stats_bucket_size = hs.config.stats_bucket_size + + # The current position in the current_state_delta stream + self.pos = None + + # Guard to ensure we only process deltas one at a time + self._is_processing = False + + if hs.config.stats_enabled: + self.notifier.add_replication_callback(self.notify_new_event) + + # We kick this off so that we don't have to wait for a change before + # we start populating stats + self.clock.call_later(0, self.notify_new_event) + + def notify_new_event(self): + """Called when there may be more deltas to process + """ + if not self.hs.config.stats_enabled: + return + + if self._is_processing: + return + + @defer.inlineCallbacks + def process(): + try: + yield self._unsafe_process() + finally: + self._is_processing = False + + self._is_processing = True + run_as_background_process("stats.notify_new_event", process) + + @defer.inlineCallbacks + def _unsafe_process(self): + # If self.pos is None then means we haven't fetched it from DB + if self.pos is None: + self.pos = yield self.store.get_stats_stream_pos() + + # If still None then the initial background update hasn't happened yet + if self.pos is None: + defer.returnValue(None) + + # Loop round handling deltas until we're up to date + while True: + with Measure(self.clock, "stats_delta"): + deltas = yield self.store.get_current_state_deltas(self.pos) + if not deltas: + return + + logger.info("Handling %d state deltas", len(deltas)) + yield self._handle_deltas(deltas) + + self.pos = deltas[-1]["stream_id"] + yield self.store.update_stats_stream_pos(self.pos) + + event_processing_positions.labels("stats").set(self.pos) + + @defer.inlineCallbacks + def _handle_deltas(self, deltas): + """ + Called with the state deltas to process + """ + for delta in deltas: + typ = delta["type"] + state_key = delta["state_key"] + room_id = delta["room_id"] + event_id = delta["event_id"] + stream_id = delta["stream_id"] + prev_event_id = delta["prev_event_id"] + + logger.debug("Handling: %r %r, %s", typ, state_key, event_id) + + token = yield self.store.get_earliest_token_for_room_stats(room_id) + + # If the earliest token to begin from is larger than our current + # stream ID, skip processing this delta. + if token is not None and token >= stream_id: + logger.debug( + "Ignoring: %s as earlier than this room's initial ingestion event", + event_id, + ) + continue + + if event_id is None and prev_event_id is None: + # Errr... + continue + + event_content = {} + + if event_id is not None: + event_content = (yield self.store.get_event(event_id)).content or {} + + # quantise time to the nearest bucket + now = yield self.store.get_received_ts(event_id) + now = (now // 1000 // self.stats_bucket_size) * self.stats_bucket_size + + if typ == EventTypes.Member: + # we could use _get_key_change here but it's a bit inefficient + # given we're not testing for a specific result; might as well + # just grab the prev_membership and membership strings and + # compare them. + prev_event_content = {} + if prev_event_id is not None: + prev_event_content = ( + yield self.store.get_event(prev_event_id) + ).content + + membership = event_content.get("membership", Membership.LEAVE) + prev_membership = prev_event_content.get("membership", Membership.LEAVE) + + if prev_membership == membership: + continue + + if prev_membership == Membership.JOIN: + yield self.store.update_stats_delta( + now, "room", room_id, "joined_members", -1 + ) + elif prev_membership == Membership.INVITE: + yield self.store.update_stats_delta( + now, "room", room_id, "invited_members", -1 + ) + elif prev_membership == Membership.LEAVE: + yield self.store.update_stats_delta( + now, "room", room_id, "left_members", -1 + ) + elif prev_membership == Membership.BAN: + yield self.store.update_stats_delta( + now, "room", room_id, "banned_members", -1 + ) + else: + err = "%s is not a valid prev_membership" % (repr(prev_membership),) + logger.error(err) + raise ValueError(err) + + if membership == Membership.JOIN: + yield self.store.update_stats_delta( + now, "room", room_id, "joined_members", +1 + ) + elif membership == Membership.INVITE: + yield self.store.update_stats_delta( + now, "room", room_id, "invited_members", +1 + ) + elif membership == Membership.LEAVE: + yield self.store.update_stats_delta( + now, "room", room_id, "left_members", +1 + ) + elif membership == Membership.BAN: + yield self.store.update_stats_delta( + now, "room", room_id, "banned_members", +1 + ) + else: + err = "%s is not a valid membership" % (repr(membership),) + logger.error(err) + raise ValueError(err) + + user_id = state_key + if self.is_mine_id(user_id): + # update user_stats as it's one of our users + public = yield self._is_public_room(room_id) + + if membership == Membership.LEAVE: + yield self.store.update_stats_delta( + now, + "user", + user_id, + "public_rooms" if public else "private_rooms", + -1, + ) + elif membership == Membership.JOIN: + yield self.store.update_stats_delta( + now, + "user", + user_id, + "public_rooms" if public else "private_rooms", + +1, + ) + + elif typ == EventTypes.Create: + # Newly created room. Add it with all blank portions. + yield self.store.update_room_state( + room_id, + { + "join_rules": None, + "history_visibility": None, + "encryption": None, + "name": None, + "topic": None, + "avatar": None, + "canonical_alias": None, + }, + ) + + elif typ == EventTypes.JoinRules: + yield self.store.update_room_state( + room_id, {"join_rules": event_content.get("join_rule")} + ) + + is_public = yield self._get_key_change( + prev_event_id, event_id, "join_rule", JoinRules.PUBLIC + ) + if is_public is not None: + yield self.update_public_room_stats(now, room_id, is_public) + + elif typ == EventTypes.RoomHistoryVisibility: + yield self.store.update_room_state( + room_id, + {"history_visibility": event_content.get("history_visibility")}, + ) + + is_public = yield self._get_key_change( + prev_event_id, event_id, "history_visibility", "world_readable" + ) + if is_public is not None: + yield self.update_public_room_stats(now, room_id, is_public) + + elif typ == EventTypes.Encryption: + yield self.store.update_room_state( + room_id, {"encryption": event_content.get("algorithm")} + ) + elif typ == EventTypes.Name: + yield self.store.update_room_state( + room_id, {"name": event_content.get("name")} + ) + elif typ == EventTypes.Topic: + yield self.store.update_room_state( + room_id, {"topic": event_content.get("topic")} + ) + elif typ == EventTypes.RoomAvatar: + yield self.store.update_room_state( + room_id, {"avatar": event_content.get("url")} + ) + elif typ == EventTypes.CanonicalAlias: + yield self.store.update_room_state( + room_id, {"canonical_alias": event_content.get("alias")} + ) + + @defer.inlineCallbacks + def update_public_room_stats(self, ts, room_id, is_public): + """ + Increment/decrement a user's number of public rooms when a room they are + in changes to/from public visibility. + + Args: + ts (int): Timestamp in seconds + room_id (str) + is_public (bool) + """ + # For now, blindly iterate over all local users in the room so that + # we can handle the whole problem of copying buckets over as needed + user_ids = yield self.store.get_users_in_room(room_id) + + for user_id in user_ids: + if self.hs.is_mine(UserID.from_string(user_id)): + yield self.store.update_stats_delta( + ts, "user", user_id, "public_rooms", +1 if is_public else -1 + ) + yield self.store.update_stats_delta( + ts, "user", user_id, "private_rooms", -1 if is_public else +1 + ) + + @defer.inlineCallbacks + def _is_public_room(self, room_id): + join_rules = yield self.state.get_current_state(room_id, EventTypes.JoinRules) + history_visibility = yield self.state.get_current_state( + room_id, EventTypes.RoomHistoryVisibility + ) + + if (join_rules and join_rules.content.get("join_rule") == JoinRules.PUBLIC) or ( + ( + history_visibility + and history_visibility.content.get("history_visibility") + == "world_readable" + ) + ): + defer.returnValue(True) + else: + defer.returnValue(False) diff --git a/synapse/server.py b/synapse/server.py index 80d40b9272..9229a68a8d 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -72,6 +72,7 @@ from synapse.handlers.room_list import RoomListHandler from synapse.handlers.room_member import RoomMemberMasterHandler from synapse.handlers.room_member_worker import RoomMemberWorkerHandler from synapse.handlers.set_password import SetPasswordHandler +from synapse.handlers.stats import StatsHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler from synapse.handlers.user_directory import UserDirectoryHandler @@ -139,6 +140,7 @@ class HomeServer(object): 'acme_handler', 'auth_handler', 'device_handler', + 'stats_handler', 'e2e_keys_handler', 'e2e_room_keys_handler', 'event_handler', @@ -191,6 +193,7 @@ class HomeServer(object): REQUIRED_ON_MASTER_STARTUP = [ "user_directory_handler", + "stats_handler" ] # This is overridden in derived application classes @@ -474,6 +477,9 @@ class HomeServer(object): def build_secrets(self): return Secrets() + def build_stats_handler(self): + return StatsHandler(self) + def build_spam_checker(self): return SpamChecker(self) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 7522d3fd57..66675d08ae 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -55,6 +55,7 @@ from .roommember import RoomMemberStore from .search import SearchStore from .signatures import SignatureStore from .state import StateStore +from .stats import StatsStore from .stream import StreamStore from .tags import TagsStore from .transactions import TransactionStore @@ -100,6 +101,7 @@ class DataStore( GroupServerStore, UserErasureStore, MonthlyActiveUsersStore, + StatsStore, RelationsStore, ): def __init__(self, db_conn, hs): diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index adc6cf26b5..83ffae2132 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -611,3 +611,27 @@ class EventsWorkerStore(SQLBaseStore): return res return self.runInteraction("get_rejection_reasons", f) + + def _get_total_state_event_counts_txn(self, txn, room_id): + """ + See get_state_event_counts. + """ + sql = "SELECT COUNT(*) FROM state_events WHERE room_id=?" + txn.execute(sql, (room_id,)) + row = txn.fetchone() + return row[0] if row else 0 + + def get_total_state_event_counts(self, room_id): + """ + Gets the total number of state events in a room. + + Args: + room_id (str) + + Returns: + Deferred[int] + """ + return self.runInteraction( + "get_total_state_event_counts", + self._get_total_state_event_counts_txn, room_id + ) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 57df17bcc2..4bd1669458 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -142,6 +142,38 @@ class RoomMemberWorkerStore(EventsWorkerStore): return self.runInteraction("get_room_summary", _get_room_summary_txn) + def _get_user_count_in_room_txn(self, txn, room_id, membership): + """ + See get_user_count_in_room. + """ + sql = ( + "SELECT count(*) FROM room_memberships as m" + " INNER JOIN current_state_events as c" + " ON m.event_id = c.event_id " + " AND m.room_id = c.room_id " + " AND m.user_id = c.state_key" + " WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?" + ) + + txn.execute(sql, (room_id, membership)) + row = txn.fetchone() + return row[0] + + def get_user_count_in_room(self, room_id, membership): + """ + Get the user count in a room with a particular membership. + + Args: + room_id (str) + membership (Membership) + + Returns: + Deferred[int] + """ + return self.runInteraction( + "get_users_in_room", self._get_user_count_in_room_txn, room_id, membership + ) + @cached() def get_invited_rooms_for_user(self, user_id): """ Get all the rooms the user is invited to diff --git a/synapse/storage/schema/delta/54/stats.sql b/synapse/storage/schema/delta/54/stats.sql new file mode 100644 index 0000000000..652e58308e --- /dev/null +++ b/synapse/storage/schema/delta/54/stats.sql @@ -0,0 +1,80 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE stats_stream_pos ( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT, + CHECK (Lock='X') +); + +INSERT INTO stats_stream_pos (stream_id) VALUES (null); + +CREATE TABLE user_stats ( + user_id TEXT NOT NULL, + ts BIGINT NOT NULL, + bucket_size INT NOT NULL, + public_rooms INT NOT NULL, + private_rooms INT NOT NULL +); + +CREATE UNIQUE INDEX user_stats_user_ts ON user_stats(user_id, ts); + +CREATE TABLE room_stats ( + room_id TEXT NOT NULL, + ts BIGINT NOT NULL, + bucket_size INT NOT NULL, + current_state_events INT NOT NULL, + joined_members INT NOT NULL, + invited_members INT NOT NULL, + left_members INT NOT NULL, + banned_members INT NOT NULL, + state_events INT NOT NULL +); + +CREATE UNIQUE INDEX room_stats_room_ts ON room_stats(room_id, ts); + +-- cache of current room state; useful for the publicRooms list +CREATE TABLE room_state ( + room_id TEXT NOT NULL, + join_rules TEXT, + history_visibility TEXT, + encryption TEXT, + name TEXT, + topic TEXT, + avatar TEXT, + canonical_alias TEXT + -- get aliases straight from the right table +); + +CREATE UNIQUE INDEX room_state_room ON room_state(room_id); + +CREATE TABLE room_stats_earliest_token ( + room_id TEXT NOT NULL, + token BIGINT NOT NULL +); + +CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token(room_id); + +-- Set up staging tables +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_stats_createtables', '{}'); + +-- Run through each room and update stats +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_process_rooms', '{}', 'populate_stats_createtables'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_cleanup', '{}', 'populate_stats_process_rooms'); diff --git a/synapse/storage/state_deltas.py b/synapse/storage/state_deltas.py index 31a0279b18..5fdb442104 100644 --- a/synapse/storage/state_deltas.py +++ b/synapse/storage/state_deltas.py @@ -84,10 +84,16 @@ class StateDeltasStore(SQLBaseStore): "get_current_state_deltas", get_current_state_deltas_txn ) - def get_max_stream_id_in_current_state_deltas(self): - return self._simple_select_one_onecol( + def _get_max_stream_id_in_current_state_deltas_txn(self, txn): + return self._simple_select_one_onecol_txn( + txn, table="current_state_delta_stream", keyvalues={}, retcol="COALESCE(MAX(stream_id), -1)", - desc="get_max_stream_id_in_current_state_deltas", + ) + + def get_max_stream_id_in_current_state_deltas(self): + return self.runInteraction( + "get_max_stream_id_in_current_state_deltas", + self._get_max_stream_id_in_current_state_deltas_txn, ) diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py new file mode 100644 index 0000000000..71b80a891d --- /dev/null +++ b/synapse/storage/stats.py @@ -0,0 +1,450 @@ +# -*- coding: utf-8 -*- +# Copyright 2018, 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.api.constants import EventTypes, Membership +from synapse.storage.state_deltas import StateDeltasStore +from synapse.util.caches.descriptors import cached + +logger = logging.getLogger(__name__) + +# these fields track absolutes (e.g. total number of rooms on the server) +ABSOLUTE_STATS_FIELDS = { + "room": ( + "current_state_events", + "joined_members", + "invited_members", + "left_members", + "banned_members", + "state_events", + ), + "user": ("public_rooms", "private_rooms"), +} + +TYPE_TO_ROOM = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user_id")} + +TEMP_TABLE = "_temp_populate_stats" + + +class StatsStore(StateDeltasStore): + def __init__(self, db_conn, hs): + super(StatsStore, self).__init__(db_conn, hs) + + self.server_name = hs.hostname + self.clock = self.hs.get_clock() + self.stats_enabled = hs.config.stats_enabled + self.stats_bucket_size = hs.config.stats_bucket_size + + self.register_background_update_handler( + "populate_stats_createtables", self._populate_stats_createtables + ) + self.register_background_update_handler( + "populate_stats_process_rooms", self._populate_stats_process_rooms + ) + self.register_background_update_handler( + "populate_stats_cleanup", self._populate_stats_cleanup + ) + + @defer.inlineCallbacks + def _populate_stats_createtables(self, progress, batch_size): + + if not self.stats_enabled: + yield self._end_background_update("populate_stats_createtables") + defer.returnValue(1) + + # Get all the rooms that we want to process. + def _make_staging_area(txn): + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)" + ) + txn.execute(sql) + + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_position(position TEXT NOT NULL)" + ) + txn.execute(sql) + + # Get rooms we want to process from the database + sql = """ + SELECT room_id, count(*) FROM current_state_events + GROUP BY room_id + """ + txn.execute(sql) + rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()] + self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) + del rooms + + new_pos = yield self.get_max_stream_id_in_current_state_deltas() + yield self.runInteraction("populate_stats_temp_build", _make_staging_area) + yield self._simple_insert(TEMP_TABLE + "_position", {"position": new_pos}) + self.get_earliest_token_for_room_stats.invalidate_all() + + yield self._end_background_update("populate_stats_createtables") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_stats_cleanup(self, progress, batch_size): + """ + Update the user directory stream position, then clean up the old tables. + """ + if not self.stats_enabled: + yield self._end_background_update("populate_stats_cleanup") + defer.returnValue(1) + + position = yield self._simple_select_one_onecol( + TEMP_TABLE + "_position", None, "position" + ) + yield self.update_stats_stream_pos(position) + + def _delete_staging_area(txn): + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms") + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position") + + yield self.runInteraction("populate_stats_cleanup", _delete_staging_area) + + yield self._end_background_update("populate_stats_cleanup") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_stats_process_rooms(self, progress, batch_size): + + if not self.stats_enabled: + yield self._end_background_update("populate_stats_process_rooms") + defer.returnValue(1) + + # If we don't have progress filed, delete everything. + if not progress: + yield self.delete_all_stats() + + def _get_next_batch(txn): + # Only fetch 250 rooms, so we don't fetch too many at once, even + # if those 250 rooms have less than batch_size state events. + sql = """ + SELECT room_id, events FROM %s_rooms + ORDER BY events DESC + LIMIT 250 + """ % ( + TEMP_TABLE, + ) + txn.execute(sql) + rooms_to_work_on = txn.fetchall() + + if not rooms_to_work_on: + return None + + # Get how many are left to process, so we can give status on how + # far we are in processing + txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms") + progress["remaining"] = txn.fetchone()[0] + + return rooms_to_work_on + + rooms_to_work_on = yield self.runInteraction( + "populate_stats_temp_read", _get_next_batch + ) + + # No more rooms -- complete the transaction. + if not rooms_to_work_on: + yield self._end_background_update("populate_stats_process_rooms") + defer.returnValue(1) + + logger.info( + "Processing the next %d rooms of %d remaining", + (len(rooms_to_work_on), progress["remaining"]), + ) + + # Number of state events we've processed by going through each room + processed_event_count = 0 + + for room_id, event_count in rooms_to_work_on: + + current_state_ids = yield self.get_current_state_ids(room_id) + + join_rules = yield self.get_event( + current_state_ids.get((EventTypes.JoinRules, "")), allow_none=True + ) + history_visibility = yield self.get_event( + current_state_ids.get((EventTypes.RoomHistoryVisibility, "")), + allow_none=True, + ) + encryption = yield self.get_event( + current_state_ids.get((EventTypes.RoomEncryption, "")), allow_none=True + ) + name = yield self.get_event( + current_state_ids.get((EventTypes.Name, "")), allow_none=True + ) + topic = yield self.get_event( + current_state_ids.get((EventTypes.Topic, "")), allow_none=True + ) + avatar = yield self.get_event( + current_state_ids.get((EventTypes.RoomAvatar, "")), allow_none=True + ) + canonical_alias = yield self.get_event( + current_state_ids.get((EventTypes.CanonicalAlias, "")), allow_none=True + ) + + def _or_none(x, arg): + if x: + return x.content.get(arg) + return None + + yield self.update_room_state( + room_id, + { + "join_rules": _or_none(join_rules, "join_rule"), + "history_visibility": _or_none( + history_visibility, "history_visibility" + ), + "encryption": _or_none(encryption, "algorithm"), + "name": _or_none(name, "name"), + "topic": _or_none(topic, "topic"), + "avatar": _or_none(avatar, "url"), + "canonical_alias": _or_none(canonical_alias, "alias"), + }, + ) + + now = self.hs.get_reactor().seconds() + + # quantise time to the nearest bucket + now = (now // self.stats_bucket_size) * self.stats_bucket_size + + def _fetch_data(txn): + + # Get the current token of the room + current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn) + + current_state_events = len(current_state_ids) + joined_members = self._get_user_count_in_room_txn( + txn, room_id, Membership.JOIN + ) + invited_members = self._get_user_count_in_room_txn( + txn, room_id, Membership.INVITE + ) + left_members = self._get_user_count_in_room_txn( + txn, room_id, Membership.LEAVE + ) + banned_members = self._get_user_count_in_room_txn( + txn, room_id, Membership.BAN + ) + total_state_events = self._get_total_state_event_counts_txn( + txn, room_id + ) + + self._update_stats_txn( + txn, + "room", + room_id, + now, + { + "bucket_size": self.stats_bucket_size, + "current_state_events": current_state_events, + "joined_members": joined_members, + "invited_members": invited_members, + "left_members": left_members, + "banned_members": banned_members, + "state_events": total_state_events, + }, + ) + self._simple_insert_txn( + txn, + "room_stats_earliest_token", + {"room_id": room_id, "token": current_token}, + ) + + yield self.runInteraction("update_room_stats", _fetch_data) + + # We've finished a room. Delete it from the table. + yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id}) + # Update the remaining counter. + progress["remaining"] -= 1 + yield self.runInteraction( + "populate_stats", + self._background_update_progress_txn, + "populate_stats_process_rooms", + progress, + ) + + processed_event_count += event_count + + if processed_event_count > batch_size: + # Don't process any more rooms, we've hit our batch size. + defer.returnValue(processed_event_count) + + defer.returnValue(processed_event_count) + + def delete_all_stats(self): + """ + Delete all statistics records. + """ + + def _delete_all_stats_txn(txn): + txn.execute("DELETE FROM room_state") + txn.execute("DELETE FROM room_stats") + txn.execute("DELETE FROM room_stats_earliest_token") + txn.execute("DELETE FROM user_stats") + + return self.runInteraction("delete_all_stats", _delete_all_stats_txn) + + def get_stats_stream_pos(self): + return self._simple_select_one_onecol( + table="stats_stream_pos", + keyvalues={}, + retcol="stream_id", + desc="stats_stream_pos", + ) + + def update_stats_stream_pos(self, stream_id): + return self._simple_update_one( + table="stats_stream_pos", + keyvalues={}, + updatevalues={"stream_id": stream_id}, + desc="update_stats_stream_pos", + ) + + def update_room_state(self, room_id, fields): + """ + Args: + room_id (str) + fields (dict[str:Any]) + """ + return self._simple_upsert( + table="room_state", + keyvalues={"room_id": room_id}, + values=fields, + desc="update_room_state", + ) + + def get_deltas_for_room(self, room_id, start, size=100): + """ + Get statistics deltas for a given room. + + Args: + room_id (str) + start (int): Pagination start. Number of entries, not timestamp. + size (int): How many entries to return. + + Returns: + Deferred[list[dict]], where the dict has the keys of + ABSOLUTE_STATS_FIELDS["room"] and "ts". + """ + return self._simple_select_list_paginate( + "room_stats", + {"room_id": room_id}, + "ts", + start, + size, + retcols=(list(ABSOLUTE_STATS_FIELDS["room"]) + ["ts"]), + order_direction="DESC", + ) + + def get_all_room_state(self): + return self._simple_select_list( + "room_state", None, retcols=("name", "topic", "canonical_alias") + ) + + @cached() + def get_earliest_token_for_room_stats(self, room_id): + """ + Fetch the "earliest token". This is used by the room stats delta + processor to ignore deltas that have been processed between the + start of the background task and any particular room's stats + being calculated. + + Returns: + Deferred[int] + """ + return self._simple_select_one_onecol( + "room_stats_earliest_token", + {"room_id": room_id}, + retcol="token", + allow_none=True, + ) + + def update_stats(self, stats_type, stats_id, ts, fields): + table, id_col = TYPE_TO_ROOM[stats_type] + return self._simple_upsert( + table=table, + keyvalues={id_col: stats_id, "ts": ts}, + values=fields, + desc="update_stats", + ) + + def _update_stats_txn(self, txn, stats_type, stats_id, ts, fields): + table, id_col = TYPE_TO_ROOM[stats_type] + return self._simple_upsert_txn( + txn, table=table, keyvalues={id_col: stats_id, "ts": ts}, values=fields + ) + + def update_stats_delta(self, ts, stats_type, stats_id, field, value): + def _update_stats_delta(txn): + table, id_col = TYPE_TO_ROOM[stats_type] + + sql = ( + "SELECT * FROM %s" + " WHERE %s=? and ts=(" + " SELECT MAX(ts) FROM %s" + " WHERE %s=?" + ")" + ) % (table, id_col, table, id_col) + txn.execute(sql, (stats_id, stats_id)) + rows = self.cursor_to_dict(txn) + if len(rows) == 0: + # silently skip as we don't have anything to apply a delta to yet. + # this tries to minimise any race between the initial sync and + # subsequent deltas arriving. + return + + current_ts = ts + latest_ts = rows[0]["ts"] + if current_ts < latest_ts: + # This one is in the past, but we're just encountering it now. + # Mark it as part of the current bucket. + current_ts = latest_ts + elif ts != latest_ts: + # we have to copy our absolute counters over to the new entry. + values = { + key: rows[0][key] for key in ABSOLUTE_STATS_FIELDS[stats_type] + } + values[id_col] = stats_id + values["ts"] = ts + values["bucket_size"] = self.stats_bucket_size + + self._simple_insert_txn(txn, table=table, values=values) + + # actually update the new value + if stats_type in ABSOLUTE_STATS_FIELDS[stats_type]: + self._simple_update_txn( + txn, + table=table, + keyvalues={id_col: stats_id, "ts": current_ts}, + updatevalues={field: value}, + ) + else: + sql = ("UPDATE %s SET %s=%s+? WHERE %s=? AND ts=?") % ( + table, + field, + field, + id_col, + ) + txn.execute(sql, (value, stats_id, current_ts)) + + return self.runInteraction("update_stats_delta", _update_stats_delta) diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py new file mode 100644 index 0000000000..249aba3d59 --- /dev/null +++ b/tests/handlers/test_stats.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mock import Mock + +from twisted.internet import defer + +from synapse.api.constants import EventTypes, Membership +from synapse.rest import admin +from synapse.rest.client.v1 import login, room + +from tests import unittest + + +class StatsRoomTests(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + + self.store = hs.get_datastore() + self.handler = self.hs.get_stats_handler() + + def _add_background_updates(self): + """ + Add the background updates we need to run. + """ + # Ugh, have to reset this flag + self.store._all_done = False + + self.get_success( + self.store._simple_insert( + "background_updates", + {"update_name": "populate_stats_createtables", "progress_json": "{}"}, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_stats_process_rooms", + "progress_json": "{}", + "depends_on": "populate_stats_createtables", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_stats_cleanup", + "progress_json": "{}", + "depends_on": "populate_stats_process_rooms", + }, + ) + ) + + def test_initial_room(self): + """ + The background updates will build the table from scratch. + """ + r = self.get_success(self.store.get_all_room_state()) + self.assertEqual(len(r), 0) + + # Disable stats + self.hs.config.stats_enabled = False + self.handler.stats_enabled = False + + u1 = self.register_user("u1", "pass") + u1_token = self.login("u1", "pass") + + room_1 = self.helper.create_room_as(u1, tok=u1_token) + self.helper.send_state( + room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token + ) + + # Stats disabled, shouldn't have done anything + r = self.get_success(self.store.get_all_room_state()) + self.assertEqual(len(r), 0) + + # Enable stats + self.hs.config.stats_enabled = True + self.handler.stats_enabled = True + + # Do the initial population of the user directory via the background update + self._add_background_updates() + + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + r = self.get_success(self.store.get_all_room_state()) + + self.assertEqual(len(r), 1) + self.assertEqual(r[0]["topic"], "foo") + + def test_initial_earliest_token(self): + """ + Ingestion via notify_new_event will ignore tokens that the background + update have already processed. + """ + self.reactor.advance(86401) + + self.hs.config.stats_enabled = False + self.handler.stats_enabled = False + + u1 = self.register_user("u1", "pass") + u1_token = self.login("u1", "pass") + + u2 = self.register_user("u2", "pass") + u2_token = self.login("u2", "pass") + + u3 = self.register_user("u3", "pass") + u3_token = self.login("u3", "pass") + + room_1 = self.helper.create_room_as(u1, tok=u1_token) + self.helper.send_state( + room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token + ) + + # Begin the ingestion by creating the temp tables. This will also store + # the position that the deltas should begin at, once they take over. + self.hs.config.stats_enabled = True + self.handler.stats_enabled = True + self.store._all_done = False + self.get_success(self.store.update_stats_stream_pos(None)) + + self.get_success( + self.store._simple_insert( + "background_updates", + {"update_name": "populate_stats_createtables", "progress_json": "{}"}, + ) + ) + + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + # Now, before the table is actually ingested, add some more events. + self.helper.invite(room=room_1, src=u1, targ=u2, tok=u1_token) + self.helper.join(room=room_1, user=u2, tok=u2_token) + + # Now do the initial ingestion. + self.get_success( + self.store._simple_insert( + "background_updates", + {"update_name": "populate_stats_process_rooms", "progress_json": "{}"}, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_stats_cleanup", + "progress_json": "{}", + "depends_on": "populate_stats_process_rooms", + }, + ) + ) + + self.store._all_done = False + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + self.reactor.advance(86401) + + # Now add some more events, triggering ingestion. Because of the stream + # position being set to before the events sent in the middle, a simpler + # implementation would reprocess those events, and say there were four + # users, not three. + self.helper.invite(room=room_1, src=u1, targ=u3, tok=u1_token) + self.helper.join(room=room_1, user=u3, tok=u3_token) + + # Get the deltas! There should be two -- day 1, and day 2. + r = self.get_success(self.store.get_deltas_for_room(room_1, 0)) + + # The oldest has 2 joined members + self.assertEqual(r[-1]["joined_members"], 2) + + # The newest has 3 + self.assertEqual(r[0]["joined_members"], 3) + + def test_incorrect_state_transition(self): + """ + If the state transition is not one of (JOIN, INVITE, LEAVE, BAN) to + (JOIN, INVITE, LEAVE, BAN), an error is raised. + """ + events = { + "a1": {"membership": Membership.LEAVE}, + "a2": {"membership": "not a real thing"}, + } + + def get_event(event_id): + m = Mock() + m.content = events[event_id] + d = defer.Deferred() + self.reactor.callLater(0.0, d.callback, m) + return d + + def get_received_ts(event_id): + return defer.succeed(1) + + self.store.get_received_ts = get_received_ts + self.store.get_event = get_event + + deltas = [ + { + "type": EventTypes.Member, + "state_key": "some_user", + "room_id": "room", + "event_id": "a1", + "prev_event_id": "a2", + "stream_id": "bleb", + } + ] + + f = self.get_failure(self.handler._handle_deltas(deltas), ValueError) + self.assertEqual( + f.value.args[0], "'not a real thing' is not a valid prev_membership" + ) + + # And the other way... + deltas = [ + { + "type": EventTypes.Member, + "state_key": "some_user", + "room_id": "room", + "event_id": "a2", + "prev_event_id": "a1", + "stream_id": "bleb", + } + ] + + f = self.get_failure(self.handler._handle_deltas(deltas), ValueError) + self.assertEqual( + f.value.args[0], "'not a real thing' is not a valid membership" + ) diff --git a/tests/rest/client/v1/utils.py b/tests/rest/client/v1/utils.py index 05b0143c42..f7133fc12e 100644 --- a/tests/rest/client/v1/utils.py +++ b/tests/rest/client/v1/utils.py @@ -127,3 +127,20 @@ class RestHelper(object): ) return channel.json_body + + def send_state(self, room_id, event_type, body, tok, expect_code=200): + path = "/_matrix/client/r0/rooms/%s/state/%s" % (room_id, event_type) + if tok: + path = path + "?access_token=%s" % tok + + request, channel = make_request( + self.hs.get_reactor(), "PUT", path, json.dumps(body).encode('utf8') + ) + render(request, self.resource, self.hs.get_reactor()) + + assert int(channel.result["code"]) == expect_code, ( + "Expected: %d, got: %d, resp: %r" + % (expect_code, int(channel.result["code"]), channel.result["body"]) + ) + + return channel.json_body -- cgit 1.5.1 From 85d1e03b9d50c1c64d2742ef3ec4f2dcd2bf7f9f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 23 May 2019 11:17:42 +0100 Subject: Simplifications and comments in do_auth (#5227) I was staring at this function trying to figure out wtf it was actually doing. This is (hopefully) a non-functional refactor which makes it a bit clearer. --- changelog.d/5227.misc | 1 + synapse/handlers/federation.py | 301 +++++++++++++++++++++++---------------- synapse/storage/events_worker.py | 2 +- 3 files changed, 183 insertions(+), 121 deletions(-) create mode 100644 changelog.d/5227.misc (limited to 'synapse/storage') diff --git a/changelog.d/5227.misc b/changelog.d/5227.misc new file mode 100644 index 0000000000..32bd7b6009 --- /dev/null +++ b/changelog.d/5227.misc @@ -0,0 +1 @@ +Simplifications and comments in do_auth. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 2202ed699a..cf4fad7de0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2013,15 +2013,44 @@ class FederationHandler(BaseHandler): Args: origin (str): - event (synapse.events.FrozenEvent): + event (synapse.events.EventBase): context (synapse.events.snapshot.EventContext): - auth_events (dict[(str, str)->str]): + auth_events (dict[(str, str)->synapse.events.EventBase]): + Map from (event_type, state_key) to event + + What we expect the event's auth_events to be, based on the event's + position in the dag. I think? maybe?? + + Also NB that this function adds entries to it. + Returns: + defer.Deferred[None] + """ + room_version = yield self.store.get_room_version(event.room_id) + + yield self._update_auth_events_and_context_for_auth( + origin, event, context, auth_events + ) + try: + self.auth.check(room_version, event, auth_events=auth_events) + except AuthError as e: + logger.warn("Failed auth resolution for %r because %s", event, e) + raise e + + @defer.inlineCallbacks + def _update_auth_events_and_context_for_auth( + self, origin, event, context, auth_events + ): + """Helper for do_auth. See there for docs. + + Args: + origin (str): + event (synapse.events.EventBase): + context (synapse.events.snapshot.EventContext): + auth_events (dict[(str, str)->synapse.events.EventBase]): Returns: defer.Deferred[None] """ - # Check if we have all the auth events. - current_state = set(e.event_id for e in auth_events.values()) event_auth_events = set(event.auth_event_ids()) if event.is_state(): @@ -2029,11 +2058,21 @@ class FederationHandler(BaseHandler): else: event_key = None - if event_auth_events - current_state: + # if the event's auth_events refers to events which are not in our + # calculated auth_events, we need to fetch those events from somewhere. + # + # we start by fetching them from the store, and then try calling /event_auth/. + missing_auth = event_auth_events.difference( + e.event_id for e in auth_events.values() + ) + + if missing_auth: # TODO: can we use store.have_seen_events here instead? have_events = yield self.store.get_seen_events_with_rejections( - event_auth_events - current_state + missing_auth ) + logger.debug("Got events %s from store", have_events) + missing_auth.difference_update(have_events.keys()) else: have_events = {} @@ -2042,13 +2081,12 @@ class FederationHandler(BaseHandler): for e in auth_events.values() }) - seen_events = set(have_events.keys()) - - missing_auth = event_auth_events - seen_events - current_state - if missing_auth: - logger.info("Missing auth: %s", missing_auth) # If we don't have all the auth events, we need to get them. + logger.info( + "auth_events contains unknown events: %s", + missing_auth, + ) try: remote_auth_chain = yield self.federation_client.get_event_auth( origin, event.room_id, event.event_id @@ -2089,145 +2127,168 @@ class FederationHandler(BaseHandler): have_events = yield self.store.get_seen_events_with_rejections( event.auth_event_ids() ) - seen_events = set(have_events.keys()) except Exception: # FIXME: logger.exception("Failed to get auth chain") + if event.internal_metadata.is_outlier(): + logger.info("Skipping auth_event fetch for outlier") + return + # FIXME: Assumes we have and stored all the state for all the # prev_events - current_state = set(e.event_id for e in auth_events.values()) - different_auth = event_auth_events - current_state + different_auth = event_auth_events.difference( + e.event_id for e in auth_events.values() + ) - room_version = yield self.store.get_room_version(event.room_id) + if not different_auth: + return - if different_auth and not event.internal_metadata.is_outlier(): - # Do auth conflict res. - logger.info("Different auth: %s", different_auth) - - different_events = yield logcontext.make_deferred_yieldable( - defer.gatherResults([ - logcontext.run_in_background( - self.store.get_event, - d, - allow_none=True, - allow_rejected=False, - ) - for d in different_auth - if d in have_events and not have_events[d] - ], consumeErrors=True) - ).addErrback(unwrapFirstError) - - if different_events: - local_view = dict(auth_events) - remote_view = dict(auth_events) - remote_view.update({ - (d.type, d.state_key): d for d in different_events if d - }) + logger.info( + "auth_events refers to events which are not in our calculated auth " + "chain: %s", + different_auth, + ) + + room_version = yield self.store.get_room_version(event.room_id) - new_state = yield self.state_handler.resolve_events( - room_version, - [list(local_view.values()), list(remote_view.values())], - event + different_events = yield logcontext.make_deferred_yieldable( + defer.gatherResults([ + logcontext.run_in_background( + self.store.get_event, + d, + allow_none=True, + allow_rejected=False, ) + for d in different_auth + if d in have_events and not have_events[d] + ], consumeErrors=True) + ).addErrback(unwrapFirstError) + + if different_events: + local_view = dict(auth_events) + remote_view = dict(auth_events) + remote_view.update({ + (d.type, d.state_key): d for d in different_events if d + }) - auth_events.update(new_state) + new_state = yield self.state_handler.resolve_events( + room_version, + [list(local_view.values()), list(remote_view.values())], + event + ) - current_state = set(e.event_id for e in auth_events.values()) - different_auth = event_auth_events - current_state + logger.info( + "After state res: updating auth_events with new state %s", + { + (d.type, d.state_key): d.event_id for d in new_state.values() + if auth_events.get((d.type, d.state_key)) != d + }, + ) - yield self._update_context_for_auth_events( - event, context, auth_events, event_key, - ) + auth_events.update(new_state) + + different_auth = event_auth_events.difference( + e.event_id for e in auth_events.values() + ) - if different_auth and not event.internal_metadata.is_outlier(): - logger.info("Different auth after resolution: %s", different_auth) + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, + ) - # Only do auth resolution if we have something new to say. - # We can't rove an auth failure. - do_resolution = False + if not different_auth: + # we're done + return - provable = [ - RejectedReason.NOT_ANCESTOR, RejectedReason.NOT_ANCESTOR, - ] + logger.info( + "auth_events still refers to events which are not in the calculated auth " + "chain after state resolution: %s", + different_auth, + ) - for e_id in different_auth: - if e_id in have_events: - if have_events[e_id] in provable: - do_resolution = True - break + # Only do auth resolution if we have something new to say. + # We can't prove an auth failure. + do_resolution = False - if do_resolution: - prev_state_ids = yield context.get_prev_state_ids(self.store) - # 1. Get what we think is the auth chain. - auth_ids = yield self.auth.compute_auth_events( - event, prev_state_ids - ) - local_auth_chain = yield self.store.get_auth_chain( - auth_ids, include_given=True - ) + for e_id in different_auth: + if e_id in have_events: + if have_events[e_id] == RejectedReason.NOT_ANCESTOR: + do_resolution = True + break - try: - # 2. Get remote difference. - result = yield self.federation_client.query_auth( - origin, - event.room_id, - event.event_id, - local_auth_chain, - ) + if not do_resolution: + logger.info( + "Skipping auth resolution due to lack of provable rejection reasons" + ) + return - seen_remotes = yield self.store.have_seen_events( - [e.event_id for e in result["auth_chain"]] - ) + logger.info("Doing auth resolution") - # 3. Process any remote auth chain events we haven't seen. - for ev in result["auth_chain"]: - if ev.event_id in seen_remotes: - continue + prev_state_ids = yield context.get_prev_state_ids(self.store) - if ev.event_id == event.event_id: - continue + # 1. Get what we think is the auth chain. + auth_ids = yield self.auth.compute_auth_events( + event, prev_state_ids + ) + local_auth_chain = yield self.store.get_auth_chain( + auth_ids, include_given=True + ) - try: - auth_ids = ev.auth_event_ids() - auth = { - (e.type, e.state_key): e - for e in result["auth_chain"] - if e.event_id in auth_ids - or event.type == EventTypes.Create - } - ev.internal_metadata.outlier = True + try: + # 2. Get remote difference. + result = yield self.federation_client.query_auth( + origin, + event.room_id, + event.event_id, + local_auth_chain, + ) - logger.debug( - "do_auth %s different_auth: %s", - event.event_id, e.event_id - ) + seen_remotes = yield self.store.have_seen_events( + [e.event_id for e in result["auth_chain"]] + ) - yield self._handle_new_event( - origin, ev, auth_events=auth - ) + # 3. Process any remote auth chain events we haven't seen. + for ev in result["auth_chain"]: + if ev.event_id in seen_remotes: + continue - if ev.event_id in event_auth_events: - auth_events[(ev.type, ev.state_key)] = ev - except AuthError: - pass + if ev.event_id == event.event_id: + continue - except Exception: - # FIXME: - logger.exception("Failed to query auth chain") + try: + auth_ids = ev.auth_event_ids() + auth = { + (e.type, e.state_key): e + for e in result["auth_chain"] + if e.event_id in auth_ids + or event.type == EventTypes.Create + } + ev.internal_metadata.outlier = True + + logger.debug( + "do_auth %s different_auth: %s", + event.event_id, e.event_id + ) - # 4. Look at rejects and their proofs. - # TODO. + yield self._handle_new_event( + origin, ev, auth_events=auth + ) - yield self._update_context_for_auth_events( - event, context, auth_events, event_key, - ) + if ev.event_id in event_auth_events: + auth_events[(ev.type, ev.state_key)] = ev + except AuthError: + pass - try: - self.auth.check(room_version, event, auth_events=auth_events) - except AuthError as e: - logger.warn("Failed auth resolution for %r because %s", event, e) - raise e + except Exception: + # FIXME: + logger.exception("Failed to query auth chain") + + # 4. Look at rejects and their proofs. + # TODO. + + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, + ) @defer.inlineCallbacks def _update_context_for_auth_events(self, event, context, auth_events, diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 83ffae2132..21b353cad3 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -610,7 +610,7 @@ class EventsWorkerStore(SQLBaseStore): return res - return self.runInteraction("get_rejection_reasons", f) + return self.runInteraction("get_seen_events_with_rejections", f) def _get_total_state_event_counts_txn(self, txn, room_id): """ -- cgit 1.5.1 From 2e052110ee0bca17b8e27b6b48ee8b7c64bc94ae Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 23 May 2019 11:45:39 +0100 Subject: Rewrite store_server_verify_key to store several keys at once (#5234) Storing server keys hammered the database a bit. This replaces the implementation which stored a single key, with one which can do many updates at once. --- changelog.d/5234.misc | 1 + synapse/crypto/keyring.py | 59 ++++++++++------------------------------ synapse/storage/keys.py | 65 ++++++++++++++++++++++++++------------------ tests/crypto/test_keyring.py | 14 ++++++++-- tests/storage/test_keys.py | 44 ++++++++++++++++++++---------- 5 files changed, 96 insertions(+), 87 deletions(-) create mode 100644 changelog.d/5234.misc (limited to 'synapse/storage') diff --git a/changelog.d/5234.misc b/changelog.d/5234.misc new file mode 100644 index 0000000000..43fbd6f67c --- /dev/null +++ b/changelog.d/5234.misc @@ -0,0 +1 @@ +Rewrite store_server_verify_key to store several keys at once. diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 5cc98542ce..badb5254ea 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -453,10 +453,11 @@ class Keyring(object): raise_from(KeyLookupError("Remote server returned an error"), e) keys = {} + added_keys = [] - responses = query_response["server_keys"] + time_now_ms = self.clock.time_msec() - for response in responses: + for response in query_response["server_keys"]: if ( u"signatures" not in response or perspective_name not in response[u"signatures"] @@ -492,21 +493,13 @@ class Keyring(object): ) server_name = response["server_name"] + added_keys.extend( + (server_name, key_id, key) for key_id, key in processed_response.items() + ) keys.setdefault(server_name, {}).update(processed_response) - yield logcontext.make_deferred_yieldable( - defer.gatherResults( - [ - run_in_background( - self.store_keys, - server_name=server_name, - from_server=perspective_name, - verify_keys=response_keys, - ) - for server_name, response_keys in keys.items() - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) + yield self.store.store_server_verify_keys( + perspective_name, time_now_ms, added_keys ) defer.returnValue(keys) @@ -519,6 +512,7 @@ class Keyring(object): if requested_key_id in keys: continue + time_now_ms = self.clock.time_msec() try: response = yield self.client.get_json( destination=server_name, @@ -548,12 +542,13 @@ class Keyring(object): requested_ids=[requested_key_id], response_json=response, ) - + yield self.store.store_server_verify_keys( + server_name, + time_now_ms, + ((server_name, key_id, key) for key_id, key in response_keys.items()), + ) keys.update(response_keys) - yield self.store_keys( - server_name=server_name, from_server=server_name, verify_keys=keys - ) defer.returnValue({server_name: keys}) @defer.inlineCallbacks @@ -650,32 +645,6 @@ class Keyring(object): defer.returnValue(response_keys) - def store_keys(self, server_name, from_server, verify_keys): - """Store a collection of verify keys for a given server - Args: - server_name(str): The name of the server the keys are for. - from_server(str): The server the keys were downloaded from. - verify_keys(dict): A mapping of key_id to VerifyKey. - Returns: - A deferred that completes when the keys are stored. - """ - # TODO(markjh): Store whether the keys have expired. - return logcontext.make_deferred_yieldable( - defer.gatherResults( - [ - run_in_background( - self.store.store_server_verify_key, - server_name, - server_name, - key.time_added, - key, - ) - for key_id, key in verify_keys.items() - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) - ) - @defer.inlineCallbacks def _handle_key_deferred(verify_request): diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 7036541792..3c5f52009b 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -84,38 +84,51 @@ class KeyStore(SQLBaseStore): return self.runInteraction("get_server_verify_keys", _txn) - def store_server_verify_key( - self, server_name, from_server, time_now_ms, verify_key - ): - """Stores a NACL verification key for the given server. + def store_server_verify_keys(self, from_server, ts_added_ms, verify_keys): + """Stores NACL verification keys for remote servers. Args: - server_name (str): The name of the server. - from_server (str): Where the verification key was looked up - time_now_ms (int): The time now in milliseconds - verify_key (nacl.signing.VerifyKey): The NACL verify key. + from_server (str): Where the verification keys were looked up + ts_added_ms (int): The time to record that the key was added + verify_keys (iterable[tuple[str, str, nacl.signing.VerifyKey]]): + keys to be stored. Each entry is a triplet of + (server_name, key_id, key). """ - key_id = "%s:%s" % (verify_key.alg, verify_key.version) - - # XXX fix this to not need a lock (#3819) - def _txn(txn): - self._simple_upsert_txn( - txn, - table="server_signature_keys", - keyvalues={"server_name": server_name, "key_id": key_id}, - values={ - "from_server": from_server, - "ts_added_ms": time_now_ms, - "verify_key": db_binary_type(verify_key.encode()), - }, + key_values = [] + value_values = [] + invalidations = [] + for server_name, key_id, verify_key in verify_keys: + key_values.append((server_name, key_id)) + value_values.append( + ( + from_server, + ts_added_ms, + db_binary_type(verify_key.encode()), + ) ) # invalidate takes a tuple corresponding to the params of # _get_server_verify_key. _get_server_verify_key only takes one # param, which is itself the 2-tuple (server_name, key_id). - txn.call_after( - self._get_server_verify_key.invalidate, ((server_name, key_id),) - ) - - return self.runInteraction("store_server_verify_key", _txn) + invalidations.append((server_name, key_id)) + + def _invalidate(res): + f = self._get_server_verify_key.invalidate + for i in invalidations: + f((i, )) + return res + + return self.runInteraction( + "store_server_verify_keys", + self._simple_upsert_many_txn, + table="server_signature_keys", + key_names=("server_name", "key_id"), + key_values=key_values, + value_names=( + "from_server", + "ts_added_ms", + "verify_key", + ), + value_values=value_values, + ).addCallback(_invalidate) def store_server_keys_json( self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index 3c79d4afe7..bcffe53a91 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -192,8 +192,18 @@ class KeyringTestCase(unittest.HomeserverTestCase): kr = keyring.Keyring(self.hs) key1 = signedjson.key.generate_signing_key(1) - r = self.hs.datastore.store_server_verify_key( - "server9", "", time.time() * 1000, signedjson.key.get_verify_key(key1) + key1_id = "%s:%s" % (key1.alg, key1.version) + + r = self.hs.datastore.store_server_verify_keys( + "server9", + time.time() * 1000, + [ + ( + "server9", + key1_id, + signedjson.key.get_verify_key(key1), + ), + ], ) self.get_success(r) json1 = {} diff --git a/tests/storage/test_keys.py b/tests/storage/test_keys.py index 6bfaa00fe9..71ad7aee32 100644 --- a/tests/storage/test_keys.py +++ b/tests/storage/test_keys.py @@ -31,23 +31,32 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase): def test_get_server_verify_keys(self): store = self.hs.get_datastore() - d = store.store_server_verify_key("server1", "from_server", 0, KEY_1) - self.get_success(d) - d = store.store_server_verify_key("server1", "from_server", 0, KEY_2) + key_id_1 = "ed25519:key1" + key_id_2 = "ed25519:KEY_ID_2" + d = store.store_server_verify_keys( + "from_server", + 10, + [ + ("server1", key_id_1, KEY_1), + ("server1", key_id_2, KEY_2), + ], + ) self.get_success(d) d = store.get_server_verify_keys( - [ - ("server1", "ed25519:key1"), - ("server1", "ed25519:key2"), - ("server1", "ed25519:key3"), - ] + [("server1", key_id_1), ("server1", key_id_2), ("server1", "ed25519:key3")] ) res = self.get_success(d) self.assertEqual(len(res.keys()), 3) - self.assertEqual(res[("server1", "ed25519:key1")].version, "key1") - self.assertEqual(res[("server1", "ed25519:key2")].version, "key2") + res1 = res[("server1", key_id_1)] + self.assertEqual(res1, KEY_1) + self.assertEqual(res1.version, "key1") + + res2 = res[("server1", key_id_2)] + self.assertEqual(res2, KEY_2) + # version comes from the ID it was stored with + self.assertEqual(res2.version, "KEY_ID_2") # non-existent result gives None self.assertIsNone(res[("server1", "ed25519:key3")]) @@ -60,9 +69,14 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase): key_id_1 = "ed25519:key1" key_id_2 = "ed25519:key2" - d = store.store_server_verify_key("srv1", "from_server", 0, KEY_1) - self.get_success(d) - d = store.store_server_verify_key("srv1", "from_server", 0, KEY_2) + d = store.store_server_verify_keys( + "from_server", + 0, + [ + ("srv1", key_id_1, KEY_1), + ("srv1", key_id_2, KEY_2), + ], + ) self.get_success(d) d = store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)]) @@ -81,7 +95,9 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase): new_key_2 = signedjson.key.get_verify_key( signedjson.key.generate_signing_key("key2") ) - d = store.store_server_verify_key("srv1", "from_server", 10, new_key_2) + d = store.store_server_verify_keys( + "from_server", 10, [("srv1", key_id_2, new_key_2)] + ) self.get_success(d) d = store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)]) -- cgit 1.5.1 From b75537beaf841089f9f07c9dbed04a7a420a8b1f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 3 Apr 2019 18:10:24 +0100 Subject: Store key validity time in the storage layer This is a first step to checking that the key is valid at the required moment. The idea here is that, rather than passing VerifyKey objects in and out of the storage layer, we instead pass FetchKeyResult objects, which simply wrap the VerifyKey and add a valid_until_ts field. --- changelog.d/5237.misc | 1 + synapse/crypto/keyring.py | 47 +++++++++++++++------- synapse/storage/keys.py | 31 +++++++++----- .../delta/54/add_validity_to_server_keys.sql | 23 +++++++++++ tests/crypto/test_keyring.py | 22 ++++++---- tests/storage/test_keys.py | 44 +++++++++++++------- 6 files changed, 122 insertions(+), 46 deletions(-) create mode 100644 changelog.d/5237.misc create mode 100644 synapse/storage/schema/delta/54/add_validity_to_server_keys.sql (limited to 'synapse/storage') diff --git a/changelog.d/5237.misc b/changelog.d/5237.misc new file mode 100644 index 0000000000..f4fe3b821b --- /dev/null +++ b/changelog.d/5237.misc @@ -0,0 +1 @@ +Store key validity time in the storage layer. diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 9d629b2238..14a27288fd 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -20,7 +20,6 @@ from collections import namedtuple from six import raise_from from six.moves import urllib -import nacl.signing from signedjson.key import ( decode_verify_key_bytes, encode_verify_key_base64, @@ -43,6 +42,7 @@ from synapse.api.errors import ( RequestSendFailed, SynapseError, ) +from synapse.storage.keys import FetchKeyResult from synapse.util import logcontext, unwrapFirstError from synapse.util.logcontext import ( LoggingContext, @@ -307,11 +307,15 @@ class Keyring(object): # complete this VerifyKeyRequest. result_keys = results.get(server_name, {}) for key_id in verify_request.key_ids: - key = result_keys.get(key_id) - if key: + fetch_key_result = result_keys.get(key_id) + if fetch_key_result: with PreserveLoggingContext(): verify_request.deferred.callback( - (server_name, key_id, key) + ( + server_name, + key_id, + fetch_key_result.verify_key, + ) ) break else: @@ -348,12 +352,12 @@ class Keyring(object): def get_keys_from_store(self, server_name_and_key_ids): """ Args: - server_name_and_key_ids (iterable(Tuple[str, iterable[str]]): + server_name_and_key_ids (iterable[Tuple[str, iterable[str]]]): list of (server_name, iterable[key_id]) tuples to fetch keys for Returns: - Deferred: resolves to dict[str, dict[str, VerifyKey|None]]: map from - server_name -> key_id -> VerifyKey + Deferred[dict[str, dict[str, synapse.storage.keys.FetchKeyResult|None]]]: + map from server_name -> key_id -> FetchKeyResult """ keys_to_fetch = ( (server_name, key_id) @@ -430,6 +434,18 @@ class Keyring(object): def get_server_verify_key_v2_indirect( self, server_names_and_key_ids, perspective_name, perspective_keys ): + """ + Args: + server_names_and_key_ids (iterable[Tuple[str, iterable[str]]]): + list of (server_name, iterable[key_id]) tuples to fetch keys for + perspective_name (str): name of the notary server to query for the keys + perspective_keys (dict[str, VerifyKey]): map of key_id->key for the + notary server + + Returns: + Deferred[dict[str, dict[str, synapse.storage.keys.FetchKeyResult]]]: map + from server_name -> key_id -> FetchKeyResult + """ # TODO(mark): Set the minimum_valid_until_ts to that needed by # the events being validated or the current time if validating # an incoming request. @@ -506,7 +522,7 @@ class Keyring(object): @defer.inlineCallbacks def get_server_verify_key_v2_direct(self, server_name, key_ids): - keys = {} # type: dict[str, nacl.signing.VerifyKey] + keys = {} # type: dict[str, FetchKeyResult] for requested_key_id in key_ids: if requested_key_id in keys: @@ -583,9 +599,9 @@ class Keyring(object): actually in the response Returns: - Deferred[dict[str, nacl.signing.VerifyKey]]: - map from key_id to key object + Deferred[dict[str, FetchKeyResult]]: map from key_id to result object """ + ts_valid_until_ms = response_json[u"valid_until_ts"] # start by extracting the keys from the response, since they may be required # to validate the signature on the response. @@ -595,7 +611,9 @@ class Keyring(object): key_base64 = key_data["key"] key_bytes = decode_base64(key_base64) verify_key = decode_verify_key_bytes(key_id, key_bytes) - verify_keys[key_id] = verify_key + verify_keys[key_id] = FetchKeyResult( + verify_key=verify_key, valid_until_ts=ts_valid_until_ms + ) # TODO: improve this signature checking server_name = response_json["server_name"] @@ -606,7 +624,7 @@ class Keyring(object): ) verify_signed_json( - response_json, server_name, verify_keys[key_id] + response_json, server_name, verify_keys[key_id].verify_key ) for key_id, key_data in response_json["old_verify_keys"].items(): @@ -614,7 +632,9 @@ class Keyring(object): key_base64 = key_data["key"] key_bytes = decode_base64(key_base64) verify_key = decode_verify_key_bytes(key_id, key_bytes) - verify_keys[key_id] = verify_key + verify_keys[key_id] = FetchKeyResult( + verify_key=verify_key, valid_until_ts=key_data["expired_ts"] + ) # re-sign the json with our own key, so that it is ready if we are asked to # give it out as a notary server @@ -623,7 +643,6 @@ class Keyring(object): ) signed_key_json_bytes = encode_canonical_json(signed_key_json) - ts_valid_until_ms = signed_key_json[u"valid_until_ts"] # for reasons I don't quite understand, we store this json for the key ids we # requested, as well as those we got. diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 3c5f52009b..5300720dbb 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -19,6 +19,7 @@ import logging import six +import attr from signedjson.key import decode_verify_key_bytes from synapse.util import batch_iter @@ -36,6 +37,12 @@ else: db_binary_type = memoryview +@attr.s(slots=True, frozen=True) +class FetchKeyResult(object): + verify_key = attr.ib() # VerifyKey: the key itself + valid_until_ts = attr.ib() # int: how long we can use this key for + + class KeyStore(SQLBaseStore): """Persistence for signature verification keys """ @@ -54,8 +61,8 @@ class KeyStore(SQLBaseStore): iterable of (server_name, key-id) tuples to fetch keys for Returns: - Deferred: resolves to dict[Tuple[str, str], VerifyKey|None]: - map from (server_name, key_id) -> VerifyKey, or None if the key is + Deferred: resolves to dict[Tuple[str, str], FetchKeyResult|None]: + map from (server_name, key_id) -> FetchKeyResult, or None if the key is unknown """ keys = {} @@ -65,17 +72,19 @@ class KeyStore(SQLBaseStore): # batch_iter always returns tuples so it's safe to do len(batch) sql = ( - "SELECT server_name, key_id, verify_key FROM server_signature_keys " - "WHERE 1=0" + "SELECT server_name, key_id, verify_key, ts_valid_until_ms " + "FROM server_signature_keys WHERE 1=0" ) + " OR (server_name=? AND key_id=?)" * len(batch) txn.execute(sql, tuple(itertools.chain.from_iterable(batch))) for row in txn: - server_name, key_id, key_bytes = row - keys[(server_name, key_id)] = decode_verify_key_bytes( - key_id, bytes(key_bytes) + server_name, key_id, key_bytes, ts_valid_until_ms = row + res = FetchKeyResult( + verify_key=decode_verify_key_bytes(key_id, bytes(key_bytes)), + valid_until_ts=ts_valid_until_ms, ) + keys[(server_name, key_id)] = res def _txn(txn): for batch in batch_iter(server_name_and_key_ids, 50): @@ -89,20 +98,21 @@ class KeyStore(SQLBaseStore): Args: from_server (str): Where the verification keys were looked up ts_added_ms (int): The time to record that the key was added - verify_keys (iterable[tuple[str, str, nacl.signing.VerifyKey]]): + verify_keys (iterable[tuple[str, str, FetchKeyResult]]): keys to be stored. Each entry is a triplet of (server_name, key_id, key). """ key_values = [] value_values = [] invalidations = [] - for server_name, key_id, verify_key in verify_keys: + for server_name, key_id, fetch_result in verify_keys: key_values.append((server_name, key_id)) value_values.append( ( from_server, ts_added_ms, - db_binary_type(verify_key.encode()), + fetch_result.valid_until_ts, + db_binary_type(fetch_result.verify_key.encode()), ) ) # invalidate takes a tuple corresponding to the params of @@ -125,6 +135,7 @@ class KeyStore(SQLBaseStore): value_names=( "from_server", "ts_added_ms", + "ts_valid_until_ms", "verify_key", ), value_values=value_values, diff --git a/synapse/storage/schema/delta/54/add_validity_to_server_keys.sql b/synapse/storage/schema/delta/54/add_validity_to_server_keys.sql new file mode 100644 index 0000000000..c01aa9d2d9 --- /dev/null +++ b/synapse/storage/schema/delta/54/add_validity_to_server_keys.sql @@ -0,0 +1,23 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* When we can use this key until, before we have to refresh it. */ +ALTER TABLE server_signature_keys ADD COLUMN ts_valid_until_ms BIGINT; + +UPDATE server_signature_keys SET ts_valid_until_ms = ( + SELECT MAX(ts_valid_until_ms) FROM server_keys_json skj WHERE + skj.server_name = server_signature_keys.server_name AND + skj.key_id = server_signature_keys.key_id +); diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index bcffe53a91..83de32b05d 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -25,6 +25,7 @@ from twisted.internet import defer from synapse.api.errors import SynapseError from synapse.crypto import keyring from synapse.crypto.keyring import KeyLookupError +from synapse.storage.keys import FetchKeyResult from synapse.util import logcontext from synapse.util.logcontext import LoggingContext @@ -201,7 +202,7 @@ class KeyringTestCase(unittest.HomeserverTestCase): ( "server9", key1_id, - signedjson.key.get_verify_key(key1), + FetchKeyResult(signedjson.key.get_verify_key(key1), 1000), ), ], ) @@ -251,9 +252,10 @@ class KeyringTestCase(unittest.HomeserverTestCase): server_name_and_key_ids = [(SERVER_NAME, ("key1",))] keys = self.get_success(kr.get_keys_from_server(server_name_and_key_ids)) k = keys[SERVER_NAME][testverifykey_id] - self.assertEqual(k, testverifykey) - self.assertEqual(k.alg, "ed25519") - self.assertEqual(k.version, "ver1") + self.assertEqual(k.valid_until_ts, VALID_UNTIL_TS) + self.assertEqual(k.verify_key, testverifykey) + self.assertEqual(k.verify_key.alg, "ed25519") + self.assertEqual(k.verify_key.version, "ver1") # check that the perspectives store is correctly updated lookup_triplet = (SERVER_NAME, testverifykey_id, None) @@ -321,9 +323,10 @@ class KeyringTestCase(unittest.HomeserverTestCase): keys = self.get_success(kr.get_keys_from_perspectives(server_name_and_key_ids)) self.assertIn(SERVER_NAME, keys) k = keys[SERVER_NAME][testverifykey_id] - self.assertEqual(k, testverifykey) - self.assertEqual(k.alg, "ed25519") - self.assertEqual(k.version, "ver1") + self.assertEqual(k.valid_until_ts, VALID_UNTIL_TS) + self.assertEqual(k.verify_key, testverifykey) + self.assertEqual(k.verify_key.alg, "ed25519") + self.assertEqual(k.verify_key.version, "ver1") # check that the perspectives store is correctly updated lookup_triplet = (SERVER_NAME, testverifykey_id, None) @@ -346,7 +349,10 @@ class KeyringTestCase(unittest.HomeserverTestCase): @defer.inlineCallbacks def run_in_context(f, *args, **kwargs): - with LoggingContext("testctx"): + with LoggingContext("testctx") as ctx: + # we set the "request" prop to make it easier to follow what's going on in the + # logs. + ctx.request = "testctx" rv = yield f(*args, **kwargs) defer.returnValue(rv) diff --git a/tests/storage/test_keys.py b/tests/storage/test_keys.py index 71ad7aee32..e07ff01201 100644 --- a/tests/storage/test_keys.py +++ b/tests/storage/test_keys.py @@ -17,6 +17,8 @@ import signedjson.key from twisted.internet.defer import Deferred +from synapse.storage.keys import FetchKeyResult + import tests.unittest KEY_1 = signedjson.key.decode_verify_key_base64( @@ -37,8 +39,8 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase): "from_server", 10, [ - ("server1", key_id_1, KEY_1), - ("server1", key_id_2, KEY_2), + ("server1", key_id_1, FetchKeyResult(KEY_1, 100)), + ("server1", key_id_2, FetchKeyResult(KEY_2, 200)), ], ) self.get_success(d) @@ -50,13 +52,15 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase): self.assertEqual(len(res.keys()), 3) res1 = res[("server1", key_id_1)] - self.assertEqual(res1, KEY_1) - self.assertEqual(res1.version, "key1") + self.assertEqual(res1.verify_key, KEY_1) + self.assertEqual(res1.verify_key.version, "key1") + self.assertEqual(res1.valid_until_ts, 100) res2 = res[("server1", key_id_2)] - self.assertEqual(res2, KEY_2) + self.assertEqual(res2.verify_key, KEY_2) # version comes from the ID it was stored with - self.assertEqual(res2.version, "KEY_ID_2") + self.assertEqual(res2.verify_key.version, "KEY_ID_2") + self.assertEqual(res2.valid_until_ts, 200) # non-existent result gives None self.assertIsNone(res[("server1", "ed25519:key3")]) @@ -73,8 +77,8 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase): "from_server", 0, [ - ("srv1", key_id_1, KEY_1), - ("srv1", key_id_2, KEY_2), + ("srv1", key_id_1, FetchKeyResult(KEY_1, 100)), + ("srv1", key_id_2, FetchKeyResult(KEY_2, 200)), ], ) self.get_success(d) @@ -82,26 +86,38 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase): d = store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)]) res = self.get_success(d) self.assertEqual(len(res.keys()), 2) - self.assertEqual(res[("srv1", key_id_1)], KEY_1) - self.assertEqual(res[("srv1", key_id_2)], KEY_2) + + res1 = res[("srv1", key_id_1)] + self.assertEqual(res1.verify_key, KEY_1) + self.assertEqual(res1.valid_until_ts, 100) + + res2 = res[("srv1", key_id_2)] + self.assertEqual(res2.verify_key, KEY_2) + self.assertEqual(res2.valid_until_ts, 200) # we should be able to look up the same thing again without a db hit res = store.get_server_verify_keys([("srv1", key_id_1)]) if isinstance(res, Deferred): res = self.successResultOf(res) self.assertEqual(len(res.keys()), 1) - self.assertEqual(res[("srv1", key_id_1)], KEY_1) + self.assertEqual(res[("srv1", key_id_1)].verify_key, KEY_1) new_key_2 = signedjson.key.get_verify_key( signedjson.key.generate_signing_key("key2") ) d = store.store_server_verify_keys( - "from_server", 10, [("srv1", key_id_2, new_key_2)] + "from_server", 10, [("srv1", key_id_2, FetchKeyResult(new_key_2, 300))] ) self.get_success(d) d = store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)]) res = self.get_success(d) self.assertEqual(len(res.keys()), 2) - self.assertEqual(res[("srv1", key_id_1)], KEY_1) - self.assertEqual(res[("srv1", key_id_2)], new_key_2) + + res1 = res[("srv1", key_id_1)] + self.assertEqual(res1.verify_key, KEY_1) + self.assertEqual(res1.valid_until_ts, 100) + + res2 = res[("srv1", key_id_2)] + self.assertEqual(res2.verify_key, new_key_2) + self.assertEqual(res2.valid_until_ts, 300) -- cgit 1.5.1 From bc4b2ecf70bc3965cbbf1daee52bf7577e219d7b Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 25 May 2019 12:02:48 -0600 Subject: Fix logging for room stats background update --- synapse/storage/stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 71b80a891d..eb0ced5b5e 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -169,7 +169,7 @@ class StatsStore(StateDeltasStore): logger.info( "Processing the next %d rooms of %d remaining", - (len(rooms_to_work_on), progress["remaining"]), + len(rooms_to_work_on), progress["remaining"], ) # Number of state events we've processed by going through each room -- cgit 1.5.1 From ba17de7fbc29700163b23363ae0e03f8a01ef274 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 28 May 2019 10:11:38 +0100 Subject: Fix schema update for account validity --- .../storage/schema/delta/54/account_validity.sql | 27 ------------------- .../delta/54/account_validity_with_renewal.sql | 30 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 27 deletions(-) delete mode 100644 synapse/storage/schema/delta/54/account_validity.sql create mode 100644 synapse/storage/schema/delta/54/account_validity_with_renewal.sql (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/54/account_validity.sql b/synapse/storage/schema/delta/54/account_validity.sql deleted file mode 100644 index 2357626000..0000000000 --- a/synapse/storage/schema/delta/54/account_validity.sql +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright 2019 New Vector Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -DROP TABLE IF EXISTS account_validity; - --- Track what users are in public rooms. -CREATE TABLE IF NOT EXISTS account_validity ( - user_id TEXT PRIMARY KEY, - expiration_ts_ms BIGINT NOT NULL, - email_sent BOOLEAN NOT NULL, - renewal_token TEXT -); - -CREATE INDEX account_validity_email_sent_idx ON account_validity(email_sent, expiration_ts_ms) -CREATE UNIQUE INDEX account_validity_renewal_string_idx ON account_validity(renewal_token) diff --git a/synapse/storage/schema/delta/54/account_validity_with_renewal.sql b/synapse/storage/schema/delta/54/account_validity_with_renewal.sql new file mode 100644 index 0000000000..0adb2ad55e --- /dev/null +++ b/synapse/storage/schema/delta/54/account_validity_with_renewal.sql @@ -0,0 +1,30 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We previously changed the schema for this table without renaming the file, which means +-- that some databases might still be using the old schema. This ensures Synapse uses the +-- right schema for the table. +DROP TABLE IF EXISTS account_validity; + +-- Track what users are in public rooms. +CREATE TABLE IF NOT EXISTS account_validity ( + user_id TEXT PRIMARY KEY, + expiration_ts_ms BIGINT NOT NULL, + email_sent BOOLEAN NOT NULL, + renewal_token TEXT +); + +CREATE INDEX account_validity_email_sent_idx ON account_validity(email_sent, expiration_ts_ms) +CREATE UNIQUE INDEX account_validity_renewal_string_idx ON account_validity(renewal_token) -- cgit 1.5.1 From 52839886d664576831462e033b88e5aba4c019e3 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 28 May 2019 16:47:42 +0100 Subject: Allow configuring a range for the account validity startup job When enabling the account validity feature, Synapse will look at startup for registered account without an expiration date, and will set one equals to 'now + validity_period' for them. On large servers, it can mean that a large number of users will have the same expiration date, which means that they will all be sent a renewal email at the same time, which isn't ideal. In order to mitigate this, this PR allows server admins to define a 'max_delta' so that the expiration date is a random value in the [now + validity_period ; now + validity_period + max_delta] range. This allows renewal emails to be progressively sent over a configured period instead of being sent all in one big batch. --- synapse/config/registration.py | 11 +++++++++++ synapse/storage/_base.py | 23 +++++++++++++++++++++-- tests/rest/client/v2_alpha/test_register.py | 21 +++++++++++++++++++++ 3 files changed, 53 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 693288f938..b4fd4af368 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -39,6 +39,10 @@ class AccountValidityConfig(Config): else: self.renew_email_subject = "Renew your %(app)s account" + self.startup_job_max_delta = self.parse_duration( + config.get("startup_job_max_delta", 0), + ) + if self.renew_by_email_enabled and "public_baseurl" not in synapse_config: raise ConfigError("Can't send renewal emails without 'public_baseurl'") @@ -131,11 +135,18 @@ class RegistrationConfig(Config): # after that the validity period changes and Synapse is restarted, the users' # expiration dates won't be updated unless their account is manually renewed. # + # If set, the ``startup_job_max_delta`` optional setting will make the startup job + # described above set a random expiration date between t + period and + # t + period + startup_job_max_delta, t being the date and time at which the job + # sets the expiration date for a given user. This is useful for server admins that + # want to avoid Synapse sending a lot of renewal emails at once. + # #account_validity: # enabled: True # period: 6w # renew_at: 1w # renew_email_subject: "Renew your %%(app)s account" + # startup_job_max_delta: 2d # The user must provide all of the below types of 3PID when registering. # diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index fa6839ceca..40802fd3dc 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -16,6 +16,7 @@ # limitations under the License. import itertools import logging +import random import sys import threading import time @@ -247,6 +248,8 @@ class SQLBaseStore(object): self._check_safe_to_upsert, ) + self.rand = random.SystemRandom() + if self._account_validity.enabled: self._clock.call_later( 0.0, @@ -308,21 +311,37 @@ class SQLBaseStore(object): res = self.cursor_to_dict(txn) if res: for user in res: - self.set_expiration_date_for_user_txn(txn, user["name"]) + self.set_expiration_date_for_user_txn( + txn, + user["name"], + use_delta=True, + ) yield self.runInteraction( "get_users_with_no_expiration_date", select_users_with_no_expiration_date_txn, ) - def set_expiration_date_for_user_txn(self, txn, user_id): + def set_expiration_date_for_user_txn(self, txn, user_id, use_delta=False): """Sets an expiration date to the account with the given user ID. Args: user_id (str): User ID to set an expiration date for. + use_delta (bool): If set to False, the expiration date for the user will be + now + validity period. If set to True, this expiration date will be a + random value in the [now + period; now + period + max_delta] range, + max_delta being the configured value for the size of the range, unless + delta is 0, in which case it sets it to now + period. """ now_ms = self._clock.time_msec() expiration_ts = now_ms + self._account_validity.period + + if use_delta and self._account_validity.startup_job_max_delta: + expiration_ts = self.rand.randrange( + expiration_ts, + expiration_ts + self._account_validity.startup_job_max_delta, + ) + self._simple_insert_txn( txn, "account_validity", diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index d4a1d4d50c..7603440fd8 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -436,6 +436,7 @@ class AccountValidityBackgroundJobTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor, clock): self.validity_period = 10 + self.max_delta = 10 config = self.default_config() @@ -459,8 +460,28 @@ class AccountValidityBackgroundJobTestCase(unittest.HomeserverTestCase): """ user_id = self.register_user("kermit", "user") + self.hs.config.account_validity.startup_job_max_delta = 0 + now_ms = self.hs.clock.time_msec() self.get_success(self.store._set_expiration_date_when_missing()) res = self.get_success(self.store.get_expiration_ts_for_user(user_id)) self.assertEqual(res, now_ms + self.validity_period) + + def test_background_job_with_max_delta(self): + """ + Tests the same thing as test_background_job, except that it sets the + startup_job_max_delta parameter and checks that the expiration date is within the + allowed range. + """ + user_id = self.register_user("kermit_delta", "user") + + self.hs.config.account_validity.startup_job_max_delta = self.max_delta + + now_ms = self.hs.clock.time_msec() + self.get_success(self.store._set_expiration_date_when_missing()) + + res = self.get_success(self.store.get_expiration_ts_for_user(user_id)) + + self.assertLessEqual(res, now_ms + self.validity_period + self.delta) + self.assertGreaterEqual(res, now_ms + self.validity_period) -- cgit 1.5.1 From 58c8ed5b0dbfe0556f11985a61c0e13bbe61d93c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 May 2019 11:56:24 +0100 Subject: Correctly filter out extremities with soft failed prevs (#5274) When we receive a soft failed event we, correctly, *do not* update the forward extremity table with the event. However, if we later receive an event that references the soft failed event we then need to remove the soft failed events prev events from the forward extremities table, otherwise we just build up forward extremities. Fixes #5269 --- changelog.d/5274.bugfix | 1 + synapse/storage/events.py | 82 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 changelog.d/5274.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/5274.bugfix b/changelog.d/5274.bugfix new file mode 100644 index 0000000000..9e14d20289 --- /dev/null +++ b/changelog.d/5274.bugfix @@ -0,0 +1 @@ +Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2ffc27ff41..6e9f3d1dc0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -554,10 +554,18 @@ class EventsStore( e_id for event in new_events for e_id in event.prev_event_ids() ) - # Finally, remove any events which are prev_events of any existing events. + # Remove any events which are prev_events of any existing events. existing_prevs = yield self._get_events_which_are_prevs(result) result.difference_update(existing_prevs) + # Finally handle the case where the new events have soft-failed prev + # events. If they do we need to remove them and their prev events, + # otherwise we end up with dangling extremities. + existing_prevs = yield self._get_prevs_before_rejected( + e_id for event in new_events for e_id in event.prev_event_ids() + ) + result.difference_update(existing_prevs) + defer.returnValue(result) @defer.inlineCallbacks @@ -573,7 +581,7 @@ class EventsStore( """ results = [] - def _get_events(txn, batch): + def _get_events_which_are_prevs_txn(txn, batch): sql = """ SELECT prev_event_id, internal_metadata FROM event_edges @@ -596,10 +604,78 @@ class EventsStore( ) for chunk in batch_iter(event_ids, 100): - yield self.runInteraction("_get_events_which_are_prevs", _get_events, chunk) + yield self.runInteraction( + "_get_events_which_are_prevs", + _get_events_which_are_prevs_txn, + chunk, + ) defer.returnValue(results) + @defer.inlineCallbacks + def _get_prevs_before_rejected(self, event_ids): + """Get soft-failed ancestors to remove from the extremities. + + Given a set of events, find all those that have been soft-failed or + rejected. Returns those soft failed/rejected events and their prev + events (whether soft-failed/rejected or not), and recurses up the + prev-event graph until it finds no more soft-failed/rejected events. + + This is used to find extremities that are ancestors of new events, but + are separated by soft failed events. + + Args: + event_ids (Iterable[str]): Events to find prev events for. Note + that these must have already been persisted. + + Returns: + Deferred[set[str]] + """ + + # The set of event_ids to return. This includes all soft-failed events + # and their prev events. + existing_prevs = set() + + def _get_prevs_before_rejected_txn(txn, batch): + to_recursively_check = batch + + while to_recursively_check: + sql = """ + SELECT + event_id, prev_event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + LEFT JOIN rejections USING (event_id) + LEFT JOIN event_json USING (event_id) + WHERE + event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_recursively_check), + ) + + txn.execute(sql, to_recursively_check) + to_recursively_check = [] + + for event_id, prev_event_id, metadata, rejected in txn: + if prev_event_id in existing_prevs: + continue + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + to_recursively_check.append(prev_event_id) + existing_prevs.add(prev_event_id) + + for chunk in batch_iter(event_ids, 100): + yield self.runInteraction( + "_get_prevs_before_rejected", + _get_prevs_before_rejected_txn, + chunk, + ) + + defer.returnValue(existing_prevs) + @defer.inlineCallbacks def _get_new_state_after_events( self, room_id, events_context, old_latest_event_ids, new_latest_event_ids -- cgit 1.5.1 From d79c9994f416ee5dab27a277fa729ffa5ee74ccc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 May 2019 18:52:41 +0100 Subject: Add DB bg update to cleanup extremities. Due to #5269 we may have extremities in our DB that we shouldn't have, so lets add a cleanup task such to remove those. --- synapse/storage/events.py | 186 +++++++++++++++++++++ .../schema/delta/54/delete_forward_extremities.sql | 19 +++ 2 files changed, 205 insertions(+) create mode 100644 synapse/storage/schema/delta/54/delete_forward_extremities.sql (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 6e9f3d1dc0..a9be143bd5 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -221,6 +221,7 @@ class EventsStore( ): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" + EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) @@ -252,6 +253,11 @@ class EventsStore( psql_only=True, ) + self.register_background_update_handler( + self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, + self._cleanup_extremities_bg_update, + ) + self._event_persist_queue = _EventPeristenceQueue() self._state_resolution_handler = hs.get_state_resolution_handler() @@ -2341,6 +2347,186 @@ class EventsStore( get_all_updated_current_state_deltas_txn, ) + @defer.inlineCallbacks + def _cleanup_extremities_bg_update(self, progress, batch_size): + """Background update to clean out extremities that should have been + deleted previously. + + Mainly used to deal with the aftermath of #5269. + """ + + # This works by first copying all existing forward extremities into the + # `_extremities_to_check` table at start up, and then checking each + # event in that table whether we have any descendants that are not + # soft-failed/rejected. If that is the case then we delete that event + # from the forward extremities table. + # + # For efficiency, we do this in batches by recursively pulling out all + # descendants of a batch until we find the non soft-failed/rejected + # events, i.e. the set of descendants whose chain of prev events back + # to the batch of extremities are all soft-failed or rejected. + # Typically, we won't find any such events as extremities will rarely + # have any descendants, but if they do then we should delete those + # extremities. + + def _cleanup_extremities_bg_update_txn(txn): + # The set of extremity event IDs that we're checking this round + original_set = set() + + # A dict[str, set[str]] of event ID to their prev events. + graph = {} + + # The set of descendants of the original set that are not rejected + # nor soft-failed. Ancestors of these events should be removed + # from the forward extremities table. + non_rejected_leaves = set() + + # Set of event IDs that have been soft failed, and for which we + # should check if they have descendants which haven't been soft + # failed. + soft_failed_events_to_lookup = set() + + # First, we get `batch_size` events from the table, pulling out + # their prev events, if any, and their prev events rejection status. + txn.execute( + """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL, events.outlier + FROM ( + SELECT event_id AS prev_event_id + FROM _extremities_to_check + LIMIT ? + ) AS f + LEFT JOIN event_edges USING (prev_event_id) + LEFT JOIN events USING (event_id) + LEFT JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + """, (batch_size,) + ) + + for prev_event_id, event_id, metadata, rejected, outlier in txn: + original_set.add(prev_event_id) + + if not event_id or outlier: + # Common case where the forward extremity doesn't have any + # descendants. + continue + + graph.setdefault(event_id, set()).add(prev_event_id) + + soft_failed = False + if metadata: + soft_failed = json.loads(metadata).get("soft_failed") + + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # Now we recursively check all the soft-failed descendants we + # found above in the same way, until we have nothing left to + # check. + while soft_failed_events_to_lookup: + # We only want to do 100 at a time, so we split given list + # into two. + batch = list(soft_failed_events_to_lookup) + to_check, to_defer = batch[:100], batch[100:] + soft_failed_events_to_lookup = set(to_defer) + + sql = """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + INNER JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + WHERE + prev_event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_check), + ) + txn.execute(sql, to_check) + + for prev_event_id, event_id, metadata, rejected in txn: + if event_id in graph: + # Already handled this event previously, but we still + # want to record the edge. + graph.setdefault(event_id, set()).add(prev_event_id) + logger.info("Already handled") + continue + + graph.setdefault(event_id, set()).add(prev_event_id) + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # We have a set of non-soft-failed descendants, so we recurse up + # the graph to find all ancestors and add them to the set of event + # IDs that we can delete from forward extremities table. + to_delete = set() + while non_rejected_leaves: + event_id = non_rejected_leaves.pop() + prev_event_ids = graph.get(event_id, set()) + non_rejected_leaves.update(prev_event_ids) + to_delete.update(prev_event_ids) + + to_delete.intersection_update(original_set) + + logger.info("Deleting up to %d forward extremities", len(to_delete)) + + self._simple_delete_many_txn( + txn=txn, + table="event_forward_extremities", + column="event_id", + iterable=to_delete, + keyvalues={}, + ) + + if to_delete: + # We now need to invalidate the caches of these rooms + rows = self._simple_select_many_txn( + txn, + table="events", + column="event_id", + iterable=to_delete, + keyvalues={}, + retcols=("room_id",) + ) + for row in rows: + txn.call_after( + self.get_latest_event_ids_in_room.invalidate, + (row["room_id"],) + ) + + self._simple_delete_many_txn( + txn=txn, + table="_extremities_to_check", + column="event_id", + iterable=original_set, + keyvalues={}, + ) + + return len(original_set) + + num_handled = yield self.runInteraction( + "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn, + ) + + if not num_handled: + yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) + + def _drop_table_txn(txn): + txn.execute("DROP TABLE _extremities_to_check") + + yield self.runInteraction( + "_cleanup_extremities_bg_update_drop_table", + _drop_table_txn, + ) + + defer.returnValue(num_handled) + AllNewEventsResult = namedtuple( "AllNewEventsResult", diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql new file mode 100644 index 0000000000..7056bd1d00 --- /dev/null +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -0,0 +1,19 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('delete_soft_failed_extremities', '{}'); + +CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; -- cgit 1.5.1 From 7e8e683754cdc606a1440832d9b1eb47f930ddee Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 May 2019 11:58:32 +0100 Subject: Log actual number of entries deleted --- synapse/storage/_base.py | 12 +++++++++--- synapse/storage/events.py | 6 ++++-- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index fa6839ceca..3fe827cd43 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1261,7 +1261,8 @@ class SQLBaseStore(object): " AND ".join("%s = ?" % (k,) for k in keyvalues), ) - return txn.execute(sql, list(keyvalues.values())) + txn.execute(sql, list(keyvalues.values())) + return txn.rowcount def _simple_delete_many(self, table, column, iterable, keyvalues, desc): return self.runInteraction( @@ -1280,9 +1281,12 @@ class SQLBaseStore(object): column : column name to test for inclusion against `iterable` iterable : list keyvalues : dict of column names and values to select the rows with + + Returns: + int: Number rows deleted """ if not iterable: - return + return 0 sql = "DELETE FROM %s" % table @@ -1297,7 +1301,9 @@ class SQLBaseStore(object): if clauses: sql = "%s WHERE %s" % (sql, " AND ".join(clauses)) - return txn.execute(sql, values) + txn.execute(sql, values) + + return txn.rowcount def _get_cache_dict( self, db_conn, table, entity_column, stream_column, max_value, limit=100000 diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a9be143bd5..a9664928ca 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2476,7 +2476,7 @@ class EventsStore( logger.info("Deleting up to %d forward extremities", len(to_delete)) - self._simple_delete_many_txn( + deleted = self._simple_delete_many_txn( txn=txn, table="event_forward_extremities", column="event_id", @@ -2484,7 +2484,9 @@ class EventsStore( keyvalues={}, ) - if to_delete: + logger.info("Deleted %d forward extremities", deleted) + + if deleted: # We now need to invalidate the caches of these rooms rows = self._simple_select_many_txn( txn, -- cgit 1.5.1 From 46c8f7a5170d04dfa6ad02c69667d4aa48635231 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 30 May 2019 01:47:16 +1000 Subject: Implement the SHHS complexity API (#5216) --- changelog.d/5216.misc | 1 + synapse/api/urls.py | 1 + synapse/federation/transport/server.py | 31 +++++++++++- synapse/rest/admin/__init__.py | 12 +++-- synapse/storage/events_worker.py | 50 ++++++++++++++++++- tests/federation/test_complexity.py | 90 ++++++++++++++++++++++++++++++++++ 6 files changed, 180 insertions(+), 5 deletions(-) create mode 100644 changelog.d/5216.misc create mode 100644 tests/federation/test_complexity.py (limited to 'synapse/storage') diff --git a/changelog.d/5216.misc b/changelog.d/5216.misc new file mode 100644 index 0000000000..dbfa29475f --- /dev/null +++ b/changelog.d/5216.misc @@ -0,0 +1 @@ +Synapse will now serve the experimental "room complexity" API endpoint. diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 3c6bddff7a..e16c386a14 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -26,6 +26,7 @@ CLIENT_API_PREFIX = "/_matrix/client" FEDERATION_PREFIX = "/_matrix/federation" FEDERATION_V1_PREFIX = FEDERATION_PREFIX + "/v1" FEDERATION_V2_PREFIX = FEDERATION_PREFIX + "/v2" +FEDERATION_UNSTABLE_PREFIX = FEDERATION_PREFIX + "/unstable" STATIC_PREFIX = "/_matrix/static" WEB_CLIENT_PREFIX = "/_matrix/client" CONTENT_REPO_PREFIX = "/_matrix/content" diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 385eda2dca..d0efc4e0d3 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -23,7 +23,11 @@ from twisted.internet import defer import synapse from synapse.api.errors import Codes, FederationDeniedError, SynapseError from synapse.api.room_versions import RoomVersions -from synapse.api.urls import FEDERATION_V1_PREFIX, FEDERATION_V2_PREFIX +from synapse.api.urls import ( + FEDERATION_UNSTABLE_PREFIX, + FEDERATION_V1_PREFIX, + FEDERATION_V2_PREFIX, +) from synapse.http.endpoint import parse_and_validate_server_name from synapse.http.server import JsonResource from synapse.http.servlet import ( @@ -1304,6 +1308,30 @@ class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): defer.returnValue((200, new_content)) +class RoomComplexityServlet(BaseFederationServlet): + """ + Indicates to other servers how complex (and therefore likely + resource-intensive) a public room this server knows about is. + """ + PATH = "/rooms/(?P[^/]*)/complexity" + PREFIX = FEDERATION_UNSTABLE_PREFIX + + @defer.inlineCallbacks + def on_GET(self, origin, content, query, room_id): + + store = self.handler.hs.get_datastore() + + is_public = yield store.is_room_world_readable_or_publicly_joinable( + room_id + ) + + if not is_public: + raise SynapseError(404, "Room not found", errcode=Codes.INVALID_PARAM) + + complexity = yield store.get_room_complexity(room_id) + defer.returnValue((200, complexity)) + + FEDERATION_SERVLET_CLASSES = ( FederationSendServlet, FederationEventServlet, @@ -1327,6 +1355,7 @@ FEDERATION_SERVLET_CLASSES = ( FederationThirdPartyInviteExchangeServlet, On3pidBindServlet, FederationVersionServlet, + RoomComplexityServlet, ) OPENID_SERVLET_CLASSES = ( diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 744d85594f..d6c4dcdb18 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -822,10 +822,16 @@ class AdminRestResource(JsonResource): def __init__(self, hs): JsonResource.__init__(self, hs, canonical_json=False) + register_servlets(hs, self) - register_servlets_for_client_rest_resource(hs, self) - SendServerNoticeServlet(hs).register(self) - VersionServlet(hs).register(self) + +def register_servlets(hs, http_server): + """ + Register all the admin servlets. + """ + register_servlets_for_client_rest_resource(hs, http_server) + SendServerNoticeServlet(hs).register(http_server) + VersionServlet(hs).register(http_server) def register_servlets_for_client_rest_resource(hs, http_server): diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 21b353cad3..b56c83e460 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import division + import itertools import logging from collections import namedtuple @@ -614,7 +616,7 @@ class EventsWorkerStore(SQLBaseStore): def _get_total_state_event_counts_txn(self, txn, room_id): """ - See get_state_event_counts. + See get_total_state_event_counts. """ sql = "SELECT COUNT(*) FROM state_events WHERE room_id=?" txn.execute(sql, (room_id,)) @@ -635,3 +637,49 @@ class EventsWorkerStore(SQLBaseStore): "get_total_state_event_counts", self._get_total_state_event_counts_txn, room_id ) + + def _get_current_state_event_counts_txn(self, txn, room_id): + """ + See get_current_state_event_counts. + """ + sql = "SELECT COUNT(*) FROM current_state_events WHERE room_id=?" + txn.execute(sql, (room_id,)) + row = txn.fetchone() + return row[0] if row else 0 + + def get_current_state_event_counts(self, room_id): + """ + Gets the current number of state events in a room. + + Args: + room_id (str) + + Returns: + Deferred[int] + """ + return self.runInteraction( + "get_current_state_event_counts", + self._get_current_state_event_counts_txn, room_id + ) + + @defer.inlineCallbacks + def get_room_complexity(self, room_id): + """ + Get a rough approximation of the complexity of the room. This is used by + remote servers to decide whether they wish to join the room or not. + Higher complexity value indicates that being in the room will consume + more resources. + + Args: + room_id (str) + + Returns: + Deferred[dict[str:int]] of complexity version to complexity. + """ + state_events = yield self.get_current_state_event_counts(room_id) + + # Call this one "v1", so we can introduce new ones as we want to develop + # it. + complexity_v1 = round(state_events / 500, 2) + + defer.returnValue({"v1": complexity_v1}) diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py new file mode 100644 index 0000000000..1e3e5aec66 --- /dev/null +++ b/tests/federation/test_complexity.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 Matrix.org Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.config.ratelimiting import FederationRateLimitConfig +from synapse.federation.transport import server +from synapse.rest import admin +from synapse.rest.client.v1 import login, room +from synapse.util.ratelimitutils import FederationRateLimiter + +from tests import unittest + + +class RoomComplexityTests(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def default_config(self, name='test'): + config = super(RoomComplexityTests, self).default_config(name=name) + config["limit_large_remote_room_joins"] = True + config["limit_large_remote_room_complexity"] = 0.05 + return config + + def prepare(self, reactor, clock, homeserver): + class Authenticator(object): + def authenticate_request(self, request, content): + return defer.succeed("otherserver.nottld") + + ratelimiter = FederationRateLimiter( + clock, + FederationRateLimitConfig( + window_size=1, + sleep_limit=1, + sleep_msec=1, + reject_limit=1000, + concurrent_requests=1000, + ), + ) + server.register_servlets( + homeserver, self.resource, Authenticator(), ratelimiter + ) + + def test_complexity_simple(self): + + u1 = self.register_user("u1", "pass") + u1_token = self.login("u1", "pass") + + room_1 = self.helper.create_room_as(u1, tok=u1_token) + self.helper.send_state( + room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token + ) + + # Get the room complexity + request, channel = self.make_request( + "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,) + ) + self.render(request) + self.assertEquals(200, channel.code) + complexity = channel.json_body["v1"] + self.assertTrue(complexity > 0, complexity) + + # Artificially raise the complexity + store = self.hs.get_datastore() + store.get_current_state_event_counts = lambda x: defer.succeed(500 * 1.23) + + # Get the room complexity again -- make sure it's our artificial value + request, channel = self.make_request( + "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,) + ) + self.render(request) + self.assertEquals(200, channel.code) + complexity = channel.json_body["v1"] + self.assertEqual(complexity, 1.23) -- cgit 1.5.1 From 640fcbb07f8dc7d89465734f009d8e0a458c2b17 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 10:55:55 +0100 Subject: Fixup comments and logging --- synapse/storage/events.py | 21 ++++++++++++--------- .../schema/delta/54/delete_forward_extremities.sql | 3 +++ 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a9664928ca..418d88b8dc 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2387,7 +2387,8 @@ class EventsStore( soft_failed_events_to_lookup = set() # First, we get `batch_size` events from the table, pulling out - # their prev events, if any, and their prev events rejection status. + # their successor events, if any, and their successor events + # rejection status. txn.execute( """SELECT prev_event_id, event_id, internal_metadata, rejections.event_id IS NOT NULL, events.outlier @@ -2450,11 +2451,10 @@ class EventsStore( if event_id in graph: # Already handled this event previously, but we still # want to record the edge. - graph.setdefault(event_id, set()).add(prev_event_id) - logger.info("Already handled") + graph[event_id].add(prev_event_id) continue - graph.setdefault(event_id, set()).add(prev_event_id) + graph[event_id] = {prev_event_id} soft_failed = json.loads(metadata).get("soft_failed") if soft_failed or rejected: @@ -2474,8 +2474,6 @@ class EventsStore( to_delete.intersection_update(original_set) - logger.info("Deleting up to %d forward extremities", len(to_delete)) - deleted = self._simple_delete_many_txn( txn=txn, table="event_forward_extremities", @@ -2484,7 +2482,11 @@ class EventsStore( keyvalues={}, ) - logger.info("Deleted %d forward extremities", deleted) + logger.info( + "Deleted %d forward extremities of %d checked, to clean up #5269", + deleted, + len(original_set), + ) if deleted: # We now need to invalidate the caches of these rooms @@ -2496,10 +2498,11 @@ class EventsStore( keyvalues={}, retcols=("room_id",) ) - for row in rows: + room_ids = set(row["room_id"] for row in rows) + for room_id in room_ids: txn.call_after( self.get_latest_event_ids_in_room.invalidate, - (row["room_id"],) + (room_id,) ) self._simple_delete_many_txn( diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql index 7056bd1d00..aa40f13da7 100644 --- a/synapse/storage/schema/delta/54/delete_forward_extremities.sql +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -13,7 +13,10 @@ * limitations under the License. */ +-- Start a background job to cleanup extremities that were incorrectly added +-- by bug #5269. INSERT INTO background_updates (update_name, progress_json) VALUES ('delete_soft_failed_extremities', '{}'); +DROP TABLE IF EXISTS _extremities_to_check; -- To make this delta schema file idempotent. CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; -- cgit 1.5.1 From 5c1ece0ffcd803eb4bf8e5748d3e2633426e00a0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 11:22:59 +0100 Subject: Move event background updates to a separate file --- synapse/storage/__init__.py | 2 + synapse/storage/events.py | 371 +------------------------------- synapse/storage/events_bg_updates.py | 401 +++++++++++++++++++++++++++++++++++ 3 files changed, 405 insertions(+), 369 deletions(-) create mode 100644 synapse/storage/events_bg_updates.py (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 66675d08ae..71316f7d09 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -36,6 +36,7 @@ from .engines import PostgresEngine from .event_federation import EventFederationStore from .event_push_actions import EventPushActionsStore from .events import EventsStore +from .events_bg_updates import EventsBackgroundUpdatesStore from .filtering import FilteringStore from .group_server import GroupServerStore from .keys import KeyStore @@ -66,6 +67,7 @@ logger = logging.getLogger(__name__) class DataStore( + EventsBackgroundUpdatesStore, RoomMemberStore, RoomStore, RegistrationStore, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 418d88b8dc..f9162be9b9 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2018-2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -219,47 +220,11 @@ class EventsStore( EventsWorkerStore, BackgroundUpdateStore, ): - EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" - EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" - EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) - self.register_background_update_handler( - self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts - ) - self.register_background_update_handler( - self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, - self._background_reindex_fields_sender, - ) - - self.register_background_index_update( - "event_contains_url_index", - index_name="event_contains_url_index", - table="events", - columns=["room_id", "topological_ordering", "stream_ordering"], - where_clause="contains_url = true AND outlier = false", - ) - - # an event_id index on event_search is useful for the purge_history - # api. Plus it means we get to enforce some integrity with a UNIQUE - # clause - self.register_background_index_update( - "event_search_event_id_idx", - index_name="event_search_event_id_idx", - table="event_search", - columns=["event_id"], - unique=True, - psql_only=True, - ) - - self.register_background_update_handler( - self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, - self._cleanup_extremities_bg_update, - ) self._event_persist_queue = _EventPeristenceQueue() - self._state_resolution_handler = hs.get_state_resolution_handler() @defer.inlineCallbacks @@ -1585,153 +1550,6 @@ class EventsStore( ret = yield self.runInteraction("count_daily_active_rooms", _count) defer.returnValue(ret) - @defer.inlineCallbacks - def _background_reindex_fields_sender(self, progress, batch_size): - target_min_stream_id = progress["target_min_stream_id_inclusive"] - max_stream_id = progress["max_stream_id_exclusive"] - rows_inserted = progress.get("rows_inserted", 0) - - INSERT_CLUMP_SIZE = 1000 - - def reindex_txn(txn): - sql = ( - "SELECT stream_ordering, event_id, json FROM events" - " INNER JOIN event_json USING (event_id)" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" - " LIMIT ?" - ) - - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) - - rows = txn.fetchall() - if not rows: - return 0 - - min_stream_id = rows[-1][0] - - update_rows = [] - for row in rows: - try: - event_id = row[1] - event_json = json.loads(row[2]) - sender = event_json["sender"] - content = event_json["content"] - - contains_url = "url" in content - if contains_url: - contains_url &= isinstance(content["url"], text_type) - except (KeyError, AttributeError): - # If the event is missing a necessary field then - # skip over it. - continue - - update_rows.append((sender, contains_url, event_id)) - - sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?" - - for index in range(0, len(update_rows), INSERT_CLUMP_SIZE): - clump = update_rows[index : index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - - progress = { - "target_min_stream_id_inclusive": target_min_stream_id, - "max_stream_id_exclusive": min_stream_id, - "rows_inserted": rows_inserted + len(rows), - } - - self._background_update_progress_txn( - txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress - ) - - return len(rows) - - result = yield self.runInteraction( - self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn - ) - - if not result: - yield self._end_background_update(self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME) - - defer.returnValue(result) - - @defer.inlineCallbacks - def _background_reindex_origin_server_ts(self, progress, batch_size): - target_min_stream_id = progress["target_min_stream_id_inclusive"] - max_stream_id = progress["max_stream_id_exclusive"] - rows_inserted = progress.get("rows_inserted", 0) - - INSERT_CLUMP_SIZE = 1000 - - def reindex_search_txn(txn): - sql = ( - "SELECT stream_ordering, event_id FROM events" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" - " LIMIT ?" - ) - - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) - - rows = txn.fetchall() - if not rows: - return 0 - - min_stream_id = rows[-1][0] - event_ids = [row[1] for row in rows] - - rows_to_update = [] - - chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)] - for chunk in chunks: - ev_rows = self._simple_select_many_txn( - txn, - table="event_json", - column="event_id", - iterable=chunk, - retcols=["event_id", "json"], - keyvalues={}, - ) - - for row in ev_rows: - event_id = row["event_id"] - event_json = json.loads(row["json"]) - try: - origin_server_ts = event_json["origin_server_ts"] - except (KeyError, AttributeError): - # If the event is missing a necessary field then - # skip over it. - continue - - rows_to_update.append((origin_server_ts, event_id)) - - sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?" - - for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE): - clump = rows_to_update[index : index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - - progress = { - "target_min_stream_id_inclusive": target_min_stream_id, - "max_stream_id_exclusive": min_stream_id, - "rows_inserted": rows_inserted + len(rows_to_update), - } - - self._background_update_progress_txn( - txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress - ) - - return len(rows_to_update) - - result = yield self.runInteraction( - self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn - ) - - if not result: - yield self._end_background_update(self.EVENT_ORIGIN_SERVER_TS_NAME) - - defer.returnValue(result) - def get_current_backfill_token(self): """The current minimum token that backfilled events have reached""" return -self._backfill_id_gen.get_current_token() @@ -2347,191 +2165,6 @@ class EventsStore( get_all_updated_current_state_deltas_txn, ) - @defer.inlineCallbacks - def _cleanup_extremities_bg_update(self, progress, batch_size): - """Background update to clean out extremities that should have been - deleted previously. - - Mainly used to deal with the aftermath of #5269. - """ - - # This works by first copying all existing forward extremities into the - # `_extremities_to_check` table at start up, and then checking each - # event in that table whether we have any descendants that are not - # soft-failed/rejected. If that is the case then we delete that event - # from the forward extremities table. - # - # For efficiency, we do this in batches by recursively pulling out all - # descendants of a batch until we find the non soft-failed/rejected - # events, i.e. the set of descendants whose chain of prev events back - # to the batch of extremities are all soft-failed or rejected. - # Typically, we won't find any such events as extremities will rarely - # have any descendants, but if they do then we should delete those - # extremities. - - def _cleanup_extremities_bg_update_txn(txn): - # The set of extremity event IDs that we're checking this round - original_set = set() - - # A dict[str, set[str]] of event ID to their prev events. - graph = {} - - # The set of descendants of the original set that are not rejected - # nor soft-failed. Ancestors of these events should be removed - # from the forward extremities table. - non_rejected_leaves = set() - - # Set of event IDs that have been soft failed, and for which we - # should check if they have descendants which haven't been soft - # failed. - soft_failed_events_to_lookup = set() - - # First, we get `batch_size` events from the table, pulling out - # their successor events, if any, and their successor events - # rejection status. - txn.execute( - """SELECT prev_event_id, event_id, internal_metadata, - rejections.event_id IS NOT NULL, events.outlier - FROM ( - SELECT event_id AS prev_event_id - FROM _extremities_to_check - LIMIT ? - ) AS f - LEFT JOIN event_edges USING (prev_event_id) - LEFT JOIN events USING (event_id) - LEFT JOIN event_json USING (event_id) - LEFT JOIN rejections USING (event_id) - """, (batch_size,) - ) - - for prev_event_id, event_id, metadata, rejected, outlier in txn: - original_set.add(prev_event_id) - - if not event_id or outlier: - # Common case where the forward extremity doesn't have any - # descendants. - continue - - graph.setdefault(event_id, set()).add(prev_event_id) - - soft_failed = False - if metadata: - soft_failed = json.loads(metadata).get("soft_failed") - - if soft_failed or rejected: - soft_failed_events_to_lookup.add(event_id) - else: - non_rejected_leaves.add(event_id) - - # Now we recursively check all the soft-failed descendants we - # found above in the same way, until we have nothing left to - # check. - while soft_failed_events_to_lookup: - # We only want to do 100 at a time, so we split given list - # into two. - batch = list(soft_failed_events_to_lookup) - to_check, to_defer = batch[:100], batch[100:] - soft_failed_events_to_lookup = set(to_defer) - - sql = """SELECT prev_event_id, event_id, internal_metadata, - rejections.event_id IS NOT NULL - FROM event_edges - INNER JOIN events USING (event_id) - INNER JOIN event_json USING (event_id) - LEFT JOIN rejections USING (event_id) - WHERE - prev_event_id IN (%s) - AND NOT events.outlier - """ % ( - ",".join("?" for _ in to_check), - ) - txn.execute(sql, to_check) - - for prev_event_id, event_id, metadata, rejected in txn: - if event_id in graph: - # Already handled this event previously, but we still - # want to record the edge. - graph[event_id].add(prev_event_id) - continue - - graph[event_id] = {prev_event_id} - - soft_failed = json.loads(metadata).get("soft_failed") - if soft_failed or rejected: - soft_failed_events_to_lookup.add(event_id) - else: - non_rejected_leaves.add(event_id) - - # We have a set of non-soft-failed descendants, so we recurse up - # the graph to find all ancestors and add them to the set of event - # IDs that we can delete from forward extremities table. - to_delete = set() - while non_rejected_leaves: - event_id = non_rejected_leaves.pop() - prev_event_ids = graph.get(event_id, set()) - non_rejected_leaves.update(prev_event_ids) - to_delete.update(prev_event_ids) - - to_delete.intersection_update(original_set) - - deleted = self._simple_delete_many_txn( - txn=txn, - table="event_forward_extremities", - column="event_id", - iterable=to_delete, - keyvalues={}, - ) - - logger.info( - "Deleted %d forward extremities of %d checked, to clean up #5269", - deleted, - len(original_set), - ) - - if deleted: - # We now need to invalidate the caches of these rooms - rows = self._simple_select_many_txn( - txn, - table="events", - column="event_id", - iterable=to_delete, - keyvalues={}, - retcols=("room_id",) - ) - room_ids = set(row["room_id"] for row in rows) - for room_id in room_ids: - txn.call_after( - self.get_latest_event_ids_in_room.invalidate, - (room_id,) - ) - - self._simple_delete_many_txn( - txn=txn, - table="_extremities_to_check", - column="event_id", - iterable=original_set, - keyvalues={}, - ) - - return len(original_set) - - num_handled = yield self.runInteraction( - "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn, - ) - - if not num_handled: - yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) - - def _drop_table_txn(txn): - txn.execute("DROP TABLE _extremities_to_check") - - yield self.runInteraction( - "_cleanup_extremities_bg_update_drop_table", - _drop_table_txn, - ) - - defer.returnValue(num_handled) - AllNewEventsResult = namedtuple( "AllNewEventsResult", diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py new file mode 100644 index 0000000000..2eba106abf --- /dev/null +++ b/synapse/storage/events_bg_updates.py @@ -0,0 +1,401 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from six import text_type + +from canonicaljson import json + +from twisted.internet import defer + +from synapse.storage.background_updates import BackgroundUpdateStore + +logger = logging.getLogger(__name__) + + +class EventsBackgroundUpdatesStore(BackgroundUpdateStore): + + EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" + EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" + EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" + + def __init__(self, db_conn, hs): + super(EventsBackgroundUpdatesStore, self).__init__(db_conn, hs) + + self.register_background_update_handler( + self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts + ) + self.register_background_update_handler( + self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, + self._background_reindex_fields_sender, + ) + + self.register_background_index_update( + "event_contains_url_index", + index_name="event_contains_url_index", + table="events", + columns=["room_id", "topological_ordering", "stream_ordering"], + where_clause="contains_url = true AND outlier = false", + ) + + # an event_id index on event_search is useful for the purge_history + # api. Plus it means we get to enforce some integrity with a UNIQUE + # clause + self.register_background_index_update( + "event_search_event_id_idx", + index_name="event_search_event_id_idx", + table="event_search", + columns=["event_id"], + unique=True, + psql_only=True, + ) + + self.register_background_update_handler( + self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, + self._cleanup_extremities_bg_update, + ) + + @defer.inlineCallbacks + def _background_reindex_fields_sender(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + + def reindex_txn(txn): + sql = ( + "SELECT stream_ordering, event_id, json FROM events" + " INNER JOIN event_json USING (event_id)" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) + + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + min_stream_id = rows[-1][0] + + update_rows = [] + for row in rows: + try: + event_id = row[1] + event_json = json.loads(row[2]) + sender = event_json["sender"] + content = event_json["content"] + + contains_url = "url" in content + if contains_url: + contains_url &= isinstance(content["url"], text_type) + except (KeyError, AttributeError): + # If the event is missing a necessary field then + # skip over it. + continue + + update_rows.append((sender, contains_url, event_id)) + + sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?" + + for index in range(0, len(update_rows), INSERT_CLUMP_SIZE): + clump = update_rows[index : index + INSERT_CLUMP_SIZE] + txn.executemany(sql, clump) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(rows), + } + + self._background_update_progress_txn( + txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress + ) + + return len(rows) + + result = yield self.runInteraction( + self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME) + + defer.returnValue(result) + + @defer.inlineCallbacks + def _background_reindex_origin_server_ts(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + + def reindex_search_txn(txn): + sql = ( + "SELECT stream_ordering, event_id FROM events" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) + + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + min_stream_id = rows[-1][0] + event_ids = [row[1] for row in rows] + + rows_to_update = [] + + chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)] + for chunk in chunks: + ev_rows = self._simple_select_many_txn( + txn, + table="event_json", + column="event_id", + iterable=chunk, + retcols=["event_id", "json"], + keyvalues={}, + ) + + for row in ev_rows: + event_id = row["event_id"] + event_json = json.loads(row["json"]) + try: + origin_server_ts = event_json["origin_server_ts"] + except (KeyError, AttributeError): + # If the event is missing a necessary field then + # skip over it. + continue + + rows_to_update.append((origin_server_ts, event_id)) + + sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?" + + for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE): + clump = rows_to_update[index : index + INSERT_CLUMP_SIZE] + txn.executemany(sql, clump) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(rows_to_update), + } + + self._background_update_progress_txn( + txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress + ) + + return len(rows_to_update) + + result = yield self.runInteraction( + self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_ORIGIN_SERVER_TS_NAME) + + defer.returnValue(result) + + @defer.inlineCallbacks + def _cleanup_extremities_bg_update(self, progress, batch_size): + """Background update to clean out extremities that should have been + deleted previously. + + Mainly used to deal with the aftermath of #5269. + """ + + # This works by first copying all existing forward extremities into the + # `_extremities_to_check` table at start up, and then checking each + # event in that table whether we have any descendants that are not + # soft-failed/rejected. If that is the case then we delete that event + # from the forward extremities table. + # + # For efficiency, we do this in batches by recursively pulling out all + # descendants of a batch until we find the non soft-failed/rejected + # events, i.e. the set of descendants whose chain of prev events back + # to the batch of extremities are all soft-failed or rejected. + # Typically, we won't find any such events as extremities will rarely + # have any descendants, but if they do then we should delete those + # extremities. + + def _cleanup_extremities_bg_update_txn(txn): + # The set of extremity event IDs that we're checking this round + original_set = set() + + # A dict[str, set[str]] of event ID to their prev events. + graph = {} + + # The set of descendants of the original set that are not rejected + # nor soft-failed. Ancestors of these events should be removed + # from the forward extremities table. + non_rejected_leaves = set() + + # Set of event IDs that have been soft failed, and for which we + # should check if they have descendants which haven't been soft + # failed. + soft_failed_events_to_lookup = set() + + # First, we get `batch_size` events from the table, pulling out + # their successor events, if any, and their successor events + # rejection status. + txn.execute( + """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL, events.outlier + FROM ( + SELECT event_id AS prev_event_id + FROM _extremities_to_check + LIMIT ? + ) AS f + LEFT JOIN event_edges USING (prev_event_id) + LEFT JOIN events USING (event_id) + LEFT JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + """, (batch_size,) + ) + + for prev_event_id, event_id, metadata, rejected, outlier in txn: + original_set.add(prev_event_id) + + if not event_id or outlier: + # Common case where the forward extremity doesn't have any + # descendants. + continue + + graph.setdefault(event_id, set()).add(prev_event_id) + + soft_failed = False + if metadata: + soft_failed = json.loads(metadata).get("soft_failed") + + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # Now we recursively check all the soft-failed descendants we + # found above in the same way, until we have nothing left to + # check. + while soft_failed_events_to_lookup: + # We only want to do 100 at a time, so we split given list + # into two. + batch = list(soft_failed_events_to_lookup) + to_check, to_defer = batch[:100], batch[100:] + soft_failed_events_to_lookup = set(to_defer) + + sql = """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + INNER JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + WHERE + prev_event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_check), + ) + txn.execute(sql, to_check) + + for prev_event_id, event_id, metadata, rejected in txn: + if event_id in graph: + # Already handled this event previously, but we still + # want to record the edge. + graph[event_id].add(prev_event_id) + continue + + graph[event_id] = {prev_event_id} + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # We have a set of non-soft-failed descendants, so we recurse up + # the graph to find all ancestors and add them to the set of event + # IDs that we can delete from forward extremities table. + to_delete = set() + while non_rejected_leaves: + event_id = non_rejected_leaves.pop() + prev_event_ids = graph.get(event_id, set()) + non_rejected_leaves.update(prev_event_ids) + to_delete.update(prev_event_ids) + + to_delete.intersection_update(original_set) + + deleted = self._simple_delete_many_txn( + txn=txn, + table="event_forward_extremities", + column="event_id", + iterable=to_delete, + keyvalues={}, + ) + + logger.info( + "Deleted %d forward extremities of %d checked, to clean up #5269", + deleted, + len(original_set), + ) + + if deleted: + # We now need to invalidate the caches of these rooms + rows = self._simple_select_many_txn( + txn, + table="events", + column="event_id", + iterable=to_delete, + keyvalues={}, + retcols=("room_id",) + ) + room_ids = set(row["room_id"] for row in rows) + for room_id in room_ids: + txn.call_after( + self.get_latest_event_ids_in_room.invalidate, + (room_id,) + ) + + self._simple_delete_many_txn( + txn=txn, + table="_extremities_to_check", + column="event_id", + iterable=original_set, + keyvalues={}, + ) + + return len(original_set) + + num_handled = yield self.runInteraction( + "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn, + ) + + if not num_handled: + yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) + + def _drop_table_txn(txn): + txn.execute("DROP TABLE _extremities_to_check") + + yield self.runInteraction( + "_cleanup_extremities_bg_update_drop_table", + _drop_table_txn, + ) + + defer.returnValue(num_handled) -- cgit 1.5.1 From 468bd090ff354f27597e08b54a969f22afbfad43 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 11:24:42 +0100 Subject: Rename constant --- synapse/storage/events_bg_updates.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 2eba106abf..22aac1393d 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -30,7 +30,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" - EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" + DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" def __init__(self, db_conn, hs): super(EventsBackgroundUpdatesStore, self).__init__(db_conn, hs) @@ -64,7 +64,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) self.register_background_update_handler( - self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, + self.DELETE_SOFT_FAILED_EXTREMITIES, self._cleanup_extremities_bg_update, ) @@ -388,7 +388,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) if not num_handled: - yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) + yield self._end_background_update(self.DELETE_SOFT_FAILED_EXTREMITIES) def _drop_table_txn(txn): txn.execute("DROP TABLE _extremities_to_check") -- cgit 1.5.1 From cb967e2346096d7e647c757e3b57093549746f14 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 14:06:42 +0100 Subject: Update synapse/storage/events_bg_updates.py Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 22aac1393d..75c1935bf3 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -255,7 +255,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): soft_failed_events_to_lookup = set() # First, we get `batch_size` events from the table, pulling out - # their successor events, if any, and their successor events + # their successor events, if any, and the successor events' # rejection status. txn.execute( """SELECT prev_event_id, event_id, internal_metadata, -- cgit 1.5.1 From 6cdfb0207e2de72286a7a8d3b3c417c2808e90dc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 14:54:56 +0100 Subject: Add index to temp table --- synapse/storage/schema/delta/54/delete_forward_extremities.sql | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql index aa40f13da7..b062ec840c 100644 --- a/synapse/storage/schema/delta/54/delete_forward_extremities.sql +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -20,3 +20,4 @@ INSERT INTO background_updates (update_name, progress_json) VALUES DROP TABLE IF EXISTS _extremities_to_check; -- To make this delta schema file idempotent. CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; +CREATE INDEX _extremities_to_check_id ON _extremities_to_check(event_id); -- cgit 1.5.1 From 54d50fbfdf8c39d92c36291a572419cee6b9b916 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:15:13 +0100 Subject: Get events all at once --- synapse/storage/stats.py | 59 +++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 33 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index eb0ced5b5e..99b4af5555 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -179,46 +179,39 @@ class StatsStore(StateDeltasStore): current_state_ids = yield self.get_current_state_ids(room_id) - join_rules = yield self.get_event( - current_state_ids.get((EventTypes.JoinRules, "")), allow_none=True + join_rules_id = current_state_ids.get((EventTypes.JoinRules, "")) + history_visibility_id = current_state_ids.get( + (EventTypes.RoomHistoryVisibility, "") ) - history_visibility = yield self.get_event( - current_state_ids.get((EventTypes.RoomHistoryVisibility, "")), - allow_none=True, - ) - encryption = yield self.get_event( - current_state_ids.get((EventTypes.RoomEncryption, "")), allow_none=True - ) - name = yield self.get_event( - current_state_ids.get((EventTypes.Name, "")), allow_none=True - ) - topic = yield self.get_event( - current_state_ids.get((EventTypes.Topic, "")), allow_none=True - ) - avatar = yield self.get_event( - current_state_ids.get((EventTypes.RoomAvatar, "")), allow_none=True - ) - canonical_alias = yield self.get_event( - current_state_ids.get((EventTypes.CanonicalAlias, "")), allow_none=True - ) - - def _or_none(x, arg): - if x: - return x.content.get(arg) + encryption_id = current_state_ids.get((EventTypes.RoomEncryption, "")) + name_id = current_state_ids.get((EventTypes.Name, "")) + topic_id = current_state_ids.get((EventTypes.Topic, "")) + avatar_id = current_state_ids.get((EventTypes.RoomAvatar, "")) + canonical_alias_id = current_state_ids.get((EventTypes.CanonicalAlias, "")) + + state_events = yield self.get_events([ + join_rules_id, history_visibility_id, encryption_id, name_id, + topic_id, avatar_id, canonical_alias_id, + ]) + + def _get_or_none(event_id, arg): + event = state_events.get(event_id) + if event: + return event.content.get(arg) return None yield self.update_room_state( room_id, { - "join_rules": _or_none(join_rules, "join_rule"), - "history_visibility": _or_none( - history_visibility, "history_visibility" + "join_rules": _get_or_none(join_rules_id, "join_rule"), + "history_visibility": _get_or_none( + history_visibility_id, "history_visibility" ), - "encryption": _or_none(encryption, "algorithm"), - "name": _or_none(name, "name"), - "topic": _or_none(topic, "topic"), - "avatar": _or_none(avatar, "url"), - "canonical_alias": _or_none(canonical_alias, "alias"), + "encryption": _get_or_none(encryption_id, "algorithm"), + "name": _get_or_none(name_id, "name"), + "topic": _get_or_none(topic_id, "topic"), + "avatar": _get_or_none(avatar_id, "url"), + "canonical_alias": _get_or_none(canonical_alias_id, "alias"), }, ) -- cgit 1.5.1 From 04710cc2d71127b1f416e87f7a4aea3ce6d93410 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:22:32 +0100 Subject: Fetch membership counts all at once --- synapse/storage/roommember.py | 33 +++++++++++---------------------- synapse/storage/stats.py | 23 +++++++---------------- 2 files changed, 18 insertions(+), 38 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4bd1669458..7617913326 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -142,26 +142,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): return self.runInteraction("get_room_summary", _get_room_summary_txn) - def _get_user_count_in_room_txn(self, txn, room_id, membership): + def _get_user_counts_in_room_txn(self, txn, room_id): """ - See get_user_count_in_room. - """ - sql = ( - "SELECT count(*) FROM room_memberships as m" - " INNER JOIN current_state_events as c" - " ON m.event_id = c.event_id " - " AND m.room_id = c.room_id " - " AND m.user_id = c.state_key" - " WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?" - ) - - txn.execute(sql, (room_id, membership)) - row = txn.fetchone() - return row[0] - - def get_user_count_in_room(self, room_id, membership): - """ - Get the user count in a room with a particular membership. + Get the user count in a room by membership. Args: room_id (str) @@ -170,9 +153,15 @@ class RoomMemberWorkerStore(EventsWorkerStore): Returns: Deferred[int] """ - return self.runInteraction( - "get_users_in_room", self._get_user_count_in_room_txn, room_id, membership - ) + sql = """ + SELECT m.membership, count(*) FROM room_memberships as m + INNER JOIN current_state_events as c USING(event_id) + WHERE c.type = 'm.room.member' AND c.room_id = ? + GROUP BY m.membership + """ + + txn.execute(sql, (room_id,)) + return {row[0]: row[1] for row in txn} @cached() def get_invited_rooms_for_user(self, user_id): diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 99b4af5555..727f60b3bd 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -226,18 +226,9 @@ class StatsStore(StateDeltasStore): current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn) current_state_events = len(current_state_ids) - joined_members = self._get_user_count_in_room_txn( - txn, room_id, Membership.JOIN - ) - invited_members = self._get_user_count_in_room_txn( - txn, room_id, Membership.INVITE - ) - left_members = self._get_user_count_in_room_txn( - txn, room_id, Membership.LEAVE - ) - banned_members = self._get_user_count_in_room_txn( - txn, room_id, Membership.BAN - ) + + membership_counts = self._get_user_counts_in_room_txn(txn, room_id) + total_state_events = self._get_total_state_event_counts_txn( txn, room_id ) @@ -250,10 +241,10 @@ class StatsStore(StateDeltasStore): { "bucket_size": self.stats_bucket_size, "current_state_events": current_state_events, - "joined_members": joined_members, - "invited_members": invited_members, - "left_members": left_members, - "banned_members": banned_members, + "joined_members": membership_counts.get(Membership.JOIN, 0), + "invited_members": membership_counts.get(Membership.INVITE, 0), + "left_members": membership_counts.get(Membership.LEAVE, 0), + "banned_members": membership_counts.get(Membership.BAN, 0), "state_events": total_state_events, }, ) -- cgit 1.5.1 From e2c46ed851599dc08cc8a822e07c0d4f9a050ee2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:26:38 +0100 Subject: Move deletion from table inside txn --- synapse/storage/stats.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 727f60b3bd..a99637d4b4 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -254,10 +254,13 @@ class StatsStore(StateDeltasStore): {"room_id": room_id, "token": current_token}, ) + # We've finished a room. Delete it from the table. + self._simple_delete_one_txn( + txn, TEMP_TABLE + "_rooms", {"room_id": room_id}, + ) + yield self.runInteraction("update_room_stats", _fetch_data) - # We've finished a room. Delete it from the table. - yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id}) # Update the remaining counter. progress["remaining"] -= 1 yield self.runInteraction( -- cgit 1.5.1 From 5ac75fc9a2d80ddf2974d281c381f82515606403 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:26:55 +0100 Subject: Join against events to use its room_id index --- synapse/storage/events_worker.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index b56c83e460..1782428048 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -618,7 +618,12 @@ class EventsWorkerStore(SQLBaseStore): """ See get_total_state_event_counts. """ - sql = "SELECT COUNT(*) FROM state_events WHERE room_id=?" + # We join against the events table as that has an index on room_id + sql = """ + SELECT COUNT(*) FROM state_events + INNER JOIN events USING (room_id, event_id) + WHERE room_id=? + """ txn.execute(sql, (room_id,)) row = txn.fetchone() return row[0] if row else 0 -- cgit 1.5.1 From 847b9dcd1c9d7d7a43333e85f69dc78471095475 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 31 May 2019 09:54:46 +0100 Subject: Make max_delta equal to period * 10% --- synapse/config/registration.py | 15 ++++----------- synapse/storage/_base.py | 7 +++---- tests/rest/client/v2_alpha/test_register.py | 18 +----------------- 3 files changed, 8 insertions(+), 32 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/config/registration.py b/synapse/config/registration.py index b4fd4af368..1835b4b1f3 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -39,9 +39,7 @@ class AccountValidityConfig(Config): else: self.renew_email_subject = "Renew your %(app)s account" - self.startup_job_max_delta = self.parse_duration( - config.get("startup_job_max_delta", 0), - ) + self.startup_job_max_delta = self.period * 10. / 100. if self.renew_by_email_enabled and "public_baseurl" not in synapse_config: raise ConfigError("Can't send renewal emails without 'public_baseurl'") @@ -133,20 +131,15 @@ class RegistrationConfig(Config): # This means that, if a validity period is set, and Synapse is restarted (it will # then derive an expiration date from the current validity period), and some time # after that the validity period changes and Synapse is restarted, the users' - # expiration dates won't be updated unless their account is manually renewed. - # - # If set, the ``startup_job_max_delta`` optional setting will make the startup job - # described above set a random expiration date between t + period and - # t + period + startup_job_max_delta, t being the date and time at which the job - # sets the expiration date for a given user. This is useful for server admins that - # want to avoid Synapse sending a lot of renewal emails at once. + # expiration dates won't be updated unless their account is manually renewed. This + # date will be randomly selected within a range [now + period ; now + period + d], + # where d is equal to 10% of the validity period. # #account_validity: # enabled: True # period: 6w # renew_at: 1w # renew_email_subject: "Renew your %%(app)s account" - # startup_job_max_delta: 2d # The user must provide all of the below types of 3PID when registering. # diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 40802fd3dc..7f944ec717 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -329,14 +329,13 @@ class SQLBaseStore(object): user_id (str): User ID to set an expiration date for. use_delta (bool): If set to False, the expiration date for the user will be now + validity period. If set to True, this expiration date will be a - random value in the [now + period; now + period + max_delta] range, - max_delta being the configured value for the size of the range, unless - delta is 0, in which case it sets it to now + period. + random value in the [now + period; now + period + d] range, d being a + delta equal to 10% of the validity period. """ now_ms = self._clock.time_msec() expiration_ts = now_ms + self._account_validity.period - if use_delta and self._account_validity.startup_job_max_delta: + if use_delta: expiration_ts = self.rand.randrange( expiration_ts, expiration_ts + self._account_validity.startup_job_max_delta, diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 68654e25ab..711628ded1 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -436,7 +436,7 @@ class AccountValidityBackgroundJobTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor, clock): self.validity_period = 10 - self.max_delta = 10 + self.max_delta = self.validity_period * 10. / 100. config = self.default_config() @@ -453,22 +453,6 @@ class AccountValidityBackgroundJobTestCase(unittest.HomeserverTestCase): return self.hs def test_background_job(self): - """ - Tests whether the account validity startup background job does the right thing, - which is sticking an expiration date to every account that doesn't already have - one. - """ - user_id = self.register_user("kermit", "user") - - self.hs.config.account_validity.startup_job_max_delta = 0 - - now_ms = self.hs.clock.time_msec() - self.get_success(self.store._set_expiration_date_when_missing()) - - res = self.get_success(self.store.get_expiration_ts_for_user(user_id)) - self.assertEqual(res, now_ms + self.validity_period) - - def test_background_job_with_max_delta(self): """ Tests the same thing as test_background_job, except that it sets the startup_job_max_delta parameter and checks that the expiration date is within the -- cgit 1.5.1 From 5037326d6624d1d1780a0536d19ff79e275f8735 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:37:57 +0100 Subject: Add indices. Remove room_ids accidentally added We have to do this by re-inserting a background update and recreating tables, as the tables only get created during a background update and will later be deleted. We also make sure that we remove any entries that should have been removed but weren't due to a race that has been fixed in a previous commit. --- synapse/storage/schema/delta/54/stats2.sql | 28 ++++++++++++++++++++ synapse/storage/stats.py | 41 ++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 13 deletions(-) create mode 100644 synapse/storage/schema/delta/54/stats2.sql (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/54/stats2.sql b/synapse/storage/schema/delta/54/stats2.sql new file mode 100644 index 0000000000..3b2d48447f --- /dev/null +++ b/synapse/storage/schema/delta/54/stats2.sql @@ -0,0 +1,28 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- This delta file gets run after `54/stats.sql` delta. + +-- We want to add some indices to the temporary stats table, so we re-insert +-- 'populate_stats_createtables' if we are still processing the rooms update. +INSERT INTO background_updates (update_name, progress_json) + SELECT 'populate_stats_createtables', '{}' + WHERE + 'populate_stats_process_rooms' IN ( + SELECT update_name FROM background_updates + ) + AND 'populate_stats_createtables' NOT IN ( -- don't insert if already exists + SELECT update_name FROM background_updates + ); diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index a99637d4b4..1c0b183a56 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -18,6 +18,7 @@ import logging from twisted.internet import defer from synapse.api.constants import EventTypes, Membership +from synapse.storage.prepare_database import get_statements from synapse.storage.state_deltas import StateDeltasStore from synapse.util.caches.descriptors import cached @@ -69,12 +70,25 @@ class StatsStore(StateDeltasStore): # Get all the rooms that we want to process. def _make_staging_area(txn): - sql = ( - "CREATE TABLE IF NOT EXISTS " - + TEMP_TABLE - + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)" - ) - txn.execute(sql) + # Create the temporary tables + stmts = get_statements(""" + -- We just recreate the table, we'll be reinserting the + -- correct entries again later anyway. + DROP TABLE IF EXISTS {temp}_rooms; + + CREATE TABLE IF NOT EXISTS {temp}_rooms( + room_id TEXT NOT NULL, + events BIGINT NOT NULL + ); + + CREATE INDEX {temp}_rooms_events + ON {temp}_rooms(events); + CREATE INDEX {temp}_rooms_id + ON {temp}_rooms(room_id); + """.format(temp=TEMP_TABLE).splitlines()) + + for statement in stmts: + txn.execute(statement) sql = ( "CREATE TABLE IF NOT EXISTS " @@ -83,15 +97,16 @@ class StatsStore(StateDeltasStore): ) txn.execute(sql) - # Get rooms we want to process from the database + # Get rooms we want to process from the database, only adding + # those that we haven't (i.e. those not in room_stats_earliest_token) sql = """ - SELECT room_id, count(*) FROM current_state_events - GROUP BY room_id - """ + INSERT INTO %s_rooms (room_id, events) + SELECT c.room_id, count(*) FROM current_state_events AS c + LEFT JOIN room_stats_earliest_token AS t USING (room_id) + WHERE t.room_id IS NULL + GROUP BY c.room_id + """ % (TEMP_TABLE,) txn.execute(sql) - rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()] - self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) - del rooms new_pos = yield self.get_max_stream_id_in_current_state_deltas() yield self.runInteraction("populate_stats_temp_build", _make_staging_area) -- cgit 1.5.1 From 4d794dae210ce30e87d8a6b9ee2f9b481cadf539 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 31 May 2019 11:09:34 +0100 Subject: Move delta from +10% to -10% --- synapse/config/registration.py | 2 +- synapse/storage/_base.py | 4 ++-- tests/rest/client/v2_alpha/test_register.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 4af825a2ab..aad3400819 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -132,7 +132,7 @@ class RegistrationConfig(Config): # then derive an expiration date from the current validity period), and some time # after that the validity period changes and Synapse is restarted, the users' # expiration dates won't be updated unless their account is manually renewed. This - # date will be randomly selected within a range [now + period ; now + period + d], + # date will be randomly selected within a range [now + period - d ; now + period], # where d is equal to 10%% of the validity period. # #account_validity: diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 7f944ec717..086318a530 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -329,7 +329,7 @@ class SQLBaseStore(object): user_id (str): User ID to set an expiration date for. use_delta (bool): If set to False, the expiration date for the user will be now + validity period. If set to True, this expiration date will be a - random value in the [now + period; now + period + d] range, d being a + random value in the [now + period - d ; now + period] range, d being a delta equal to 10% of the validity period. """ now_ms = self._clock.time_msec() @@ -337,8 +337,8 @@ class SQLBaseStore(object): if use_delta: expiration_ts = self.rand.randrange( + expiration_ts - self._account_validity.startup_job_max_delta, expiration_ts, - expiration_ts + self._account_validity.startup_job_max_delta, ) self._simple_insert_txn( diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 711628ded1..0cb6a363d6 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -467,5 +467,5 @@ class AccountValidityBackgroundJobTestCase(unittest.HomeserverTestCase): res = self.get_success(self.store.get_expiration_ts_for_user(user_id)) - self.assertLessEqual(res, now_ms + self.validity_period + self.max_delta) - self.assertGreaterEqual(res, now_ms + self.validity_period) + self.assertGreaterEqual(res, now_ms + self.validity_period - self.max_delta) + self.assertLessEqual(res, now_ms + self.validity_period) -- cgit 1.5.1 From 37057d5d6047a7f984fc9f1db094b9169a4e4c73 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 3 Jun 2019 22:02:47 +1000 Subject: prepare --- synapse/storage/prepare_database.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index c1711bc8bd..07478b6672 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -20,6 +20,8 @@ import logging import os import re +from synapse.storage.engines.postgres import PostgresEngine + logger = logging.getLogger(__name__) @@ -115,8 +117,16 @@ def _setup_new_database(cur, database_engine): valid_dirs = [] pattern = re.compile(r"^\d+(\.sql)?$") + + if isinstance(database_engine, PostgresEngine): + specific = "postgres" + else: + specific = "sqlite" + + specific_pattern = re.compile(r"^\d+(\.sql." + specific + r")?$") + for filename in directory_entries: - match = pattern.match(filename) + match = pattern.match(filename) or specific_pattern.match(filename) abs_path = os.path.join(current_dir, filename) if match and os.path.isdir(abs_path): ver = int(match.group(0)) @@ -136,7 +146,9 @@ def _setup_new_database(cur, database_engine): directory_entries = os.listdir(sql_dir) - for filename in fnmatch.filter(directory_entries, "*.sql"): + for filename in fnmatch.filter(directory_entries, "*.sql") + fnmatch.filter( + directory_entries, "*.sql." + specific + ): sql_loc = os.path.join(sql_dir, filename) logger.debug("Applying schema %s", sql_loc) executescript(cur, sql_loc) -- cgit 1.5.1 From dc72b90cd674f69fea1d27a1c1dab60a60d5ab9d Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 3 Jun 2019 22:03:28 +1000 Subject: full schema --- .../schema/full_schemas/54/full.sql.postgres | 2040 ++++++++++++++++++++ .../storage/schema/full_schemas/54/full.sql.sqlite | 261 +++ synapse/storage/schema/full_schemas/README.txt | 14 + 3 files changed, 2315 insertions(+) create mode 100644 synapse/storage/schema/full_schemas/54/full.sql.postgres create mode 100644 synapse/storage/schema/full_schemas/54/full.sql.sqlite create mode 100644 synapse/storage/schema/full_schemas/README.txt (limited to 'synapse/storage') diff --git a/synapse/storage/schema/full_schemas/54/full.sql.postgres b/synapse/storage/schema/full_schemas/54/full.sql.postgres new file mode 100644 index 0000000000..5fb54cfe77 --- /dev/null +++ b/synapse/storage/schema/full_schemas/54/full.sql.postgres @@ -0,0 +1,2040 @@ + + + + + +CREATE TABLE _extremities_to_check ( + event_id text +); + + + +CREATE TABLE access_tokens ( + id bigint NOT NULL, + user_id text NOT NULL, + device_id text, + token text NOT NULL, + last_used bigint +); + + + +CREATE TABLE account_data ( + user_id text NOT NULL, + account_data_type text NOT NULL, + stream_id bigint NOT NULL, + content text NOT NULL +); + + + +CREATE TABLE account_data_max_stream_id ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_id bigint NOT NULL, + CONSTRAINT private_user_data_max_stream_id_lock_check CHECK ((lock = 'X'::bpchar)) +); + + + +CREATE TABLE account_validity ( + user_id text NOT NULL, + expiration_ts_ms bigint NOT NULL, + email_sent boolean NOT NULL, + renewal_token text +); + + + +CREATE TABLE application_services_state ( + as_id text NOT NULL, + state character varying(5), + last_txn integer +); + + + +CREATE TABLE application_services_txns ( + as_id text NOT NULL, + txn_id integer NOT NULL, + event_ids text NOT NULL +); + + + +CREATE TABLE applied_module_schemas ( + module_name text NOT NULL, + file text NOT NULL +); + + + +CREATE TABLE applied_schema_deltas ( + version integer NOT NULL, + file text NOT NULL +); + + + +CREATE TABLE appservice_room_list ( + appservice_id text NOT NULL, + network_id text NOT NULL, + room_id text NOT NULL +); + + + +CREATE TABLE appservice_stream_position ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_ordering bigint, + CONSTRAINT appservice_stream_position_lock_check CHECK ((lock = 'X'::bpchar)) +); + + + +CREATE TABLE background_updates ( + update_name text NOT NULL, + progress_json text NOT NULL, + depends_on text +); + + + +CREATE TABLE blocked_rooms ( + room_id text NOT NULL, + user_id text NOT NULL +); + + + +CREATE TABLE cache_invalidation_stream ( + stream_id bigint, + cache_func text, + keys text[], + invalidation_ts bigint +); + + + +CREATE TABLE current_state_delta_stream ( + stream_id bigint NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + event_id text, + prev_event_id text +); + + + +CREATE TABLE current_state_events ( + event_id text NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL +); + + + +CREATE TABLE deleted_pushers ( + stream_id bigint NOT NULL, + app_id text NOT NULL, + pushkey text NOT NULL, + user_id text NOT NULL +); + + + +CREATE TABLE destinations ( + destination text NOT NULL, + retry_last_ts bigint, + retry_interval integer +); + + + +CREATE TABLE device_federation_inbox ( + origin text NOT NULL, + message_id text NOT NULL, + received_ts bigint NOT NULL +); + + + +CREATE TABLE device_federation_outbox ( + destination text NOT NULL, + stream_id bigint NOT NULL, + queued_ts bigint NOT NULL, + messages_json text NOT NULL +); + + + +CREATE TABLE device_inbox ( + user_id text NOT NULL, + device_id text NOT NULL, + stream_id bigint NOT NULL, + message_json text NOT NULL +); + + + +CREATE TABLE device_lists_outbound_last_success ( + destination text NOT NULL, + user_id text NOT NULL, + stream_id bigint NOT NULL +); + + + +CREATE TABLE device_lists_outbound_pokes ( + destination text NOT NULL, + stream_id bigint NOT NULL, + user_id text NOT NULL, + device_id text NOT NULL, + sent boolean NOT NULL, + ts bigint NOT NULL +); + + + +CREATE TABLE device_lists_remote_cache ( + user_id text NOT NULL, + device_id text NOT NULL, + content text NOT NULL +); + + + +CREATE TABLE device_lists_remote_extremeties ( + user_id text NOT NULL, + stream_id text NOT NULL +); + + + +CREATE TABLE device_lists_stream ( + stream_id bigint NOT NULL, + user_id text NOT NULL, + device_id text NOT NULL +); + + + +CREATE TABLE device_max_stream_id ( + stream_id bigint NOT NULL +); + + + +CREATE TABLE devices ( + user_id text NOT NULL, + device_id text NOT NULL, + display_name text +); + + + +CREATE TABLE e2e_device_keys_json ( + user_id text NOT NULL, + device_id text NOT NULL, + ts_added_ms bigint NOT NULL, + key_json text NOT NULL +); + + + +CREATE TABLE e2e_one_time_keys_json ( + user_id text NOT NULL, + device_id text NOT NULL, + algorithm text NOT NULL, + key_id text NOT NULL, + ts_added_ms bigint NOT NULL, + key_json text NOT NULL +); + + + +CREATE TABLE e2e_room_keys ( + user_id text NOT NULL, + room_id text NOT NULL, + session_id text NOT NULL, + version bigint NOT NULL, + first_message_index integer, + forwarded_count integer, + is_verified boolean, + session_data text NOT NULL +); + + + +CREATE TABLE e2e_room_keys_versions ( + user_id text NOT NULL, + version bigint NOT NULL, + algorithm text NOT NULL, + auth_data text NOT NULL, + deleted smallint DEFAULT 0 NOT NULL +); + + + +CREATE TABLE erased_users ( + user_id text NOT NULL +); + + + +CREATE TABLE event_auth ( + event_id text NOT NULL, + auth_id text NOT NULL, + room_id text NOT NULL +); + + + +CREATE TABLE event_backward_extremities ( + event_id text NOT NULL, + room_id text NOT NULL +); + + + +CREATE TABLE event_edges ( + event_id text NOT NULL, + prev_event_id text NOT NULL, + room_id text NOT NULL, + is_state boolean NOT NULL +); + + + +CREATE TABLE event_forward_extremities ( + event_id text NOT NULL, + room_id text NOT NULL +); + + + +CREATE TABLE event_json ( + event_id text NOT NULL, + room_id text NOT NULL, + internal_metadata text NOT NULL, + json text NOT NULL, + format_version integer +); + + + +CREATE TABLE event_push_actions ( + room_id text NOT NULL, + event_id text NOT NULL, + user_id text NOT NULL, + profile_tag character varying(32), + actions text NOT NULL, + topological_ordering bigint, + stream_ordering bigint, + notif smallint, + highlight smallint +); + + + +CREATE TABLE event_push_actions_staging ( + event_id text NOT NULL, + user_id text NOT NULL, + actions text NOT NULL, + notif smallint NOT NULL, + highlight smallint NOT NULL +); + + + +CREATE TABLE event_push_summary ( + user_id text NOT NULL, + room_id text NOT NULL, + notif_count bigint NOT NULL, + stream_ordering bigint NOT NULL +); + + + +CREATE TABLE event_push_summary_stream_ordering ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_ordering bigint NOT NULL, + CONSTRAINT event_push_summary_stream_ordering_lock_check CHECK ((lock = 'X'::bpchar)) +); + + + +CREATE TABLE event_reference_hashes ( + event_id text, + algorithm text, + hash bytea +); + + + +CREATE TABLE event_relations ( + event_id text NOT NULL, + relates_to_id text NOT NULL, + relation_type text NOT NULL, + aggregation_key text +); + + + +CREATE TABLE event_reports ( + id bigint NOT NULL, + received_ts bigint NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL, + user_id text NOT NULL, + reason text, + content text +); + + + +CREATE TABLE event_search ( + event_id text, + room_id text, + sender text, + key text, + vector tsvector, + origin_server_ts bigint, + stream_ordering bigint +); + + + +CREATE TABLE event_to_state_groups ( + event_id text NOT NULL, + state_group bigint NOT NULL +); + + + +CREATE TABLE events ( + stream_ordering integer NOT NULL, + topological_ordering bigint NOT NULL, + event_id text NOT NULL, + type text NOT NULL, + room_id text NOT NULL, + content text, + unrecognized_keys text, + processed boolean NOT NULL, + outlier boolean NOT NULL, + depth bigint DEFAULT 0 NOT NULL, + origin_server_ts bigint, + received_ts bigint, + sender text, + contains_url boolean +); + + + +CREATE TABLE ex_outlier_stream ( + event_stream_ordering bigint NOT NULL, + event_id text NOT NULL, + state_group bigint NOT NULL +); + + + +CREATE TABLE federation_stream_position ( + type text NOT NULL, + stream_id integer NOT NULL +); + + + +CREATE TABLE group_attestations_remote ( + group_id text NOT NULL, + user_id text NOT NULL, + valid_until_ms bigint NOT NULL, + attestation_json text NOT NULL +); + + + +CREATE TABLE group_attestations_renewals ( + group_id text NOT NULL, + user_id text NOT NULL, + valid_until_ms bigint NOT NULL +); + + + +CREATE TABLE group_invites ( + group_id text NOT NULL, + user_id text NOT NULL +); + + + +CREATE TABLE group_roles ( + group_id text NOT NULL, + role_id text NOT NULL, + profile text NOT NULL, + is_boolean NOT NULL +); + + + +CREATE TABLE group_room_categories ( + group_id text NOT NULL, + category_id text NOT NULL, + profile text NOT NULL, + is_boolean NOT NULL +); + + + +CREATE TABLE group_rooms ( + group_id text NOT NULL, + room_id text NOT NULL, + is_boolean NOT NULL +); + + + +CREATE TABLE group_summary_roles ( + group_id text NOT NULL, + role_id text NOT NULL, + role_order bigint NOT NULL, + CONSTRAINT group_summary_roles_role_order_check CHECK ((role_order > 0)) +); + + + +CREATE TABLE group_summary_room_categories ( + group_id text NOT NULL, + category_id text NOT NULL, + cat_order bigint NOT NULL, + CONSTRAINT group_summary_room_categories_cat_order_check CHECK ((cat_order > 0)) +); + + + +CREATE TABLE group_summary_rooms ( + group_id text NOT NULL, + room_id text NOT NULL, + category_id text NOT NULL, + room_order bigint NOT NULL, + is_boolean NOT NULL, + CONSTRAINT group_summary_rooms_room_order_check CHECK ((room_order > 0)) +); + + + +CREATE TABLE group_summary_users ( + group_id text NOT NULL, + user_id text NOT NULL, + role_id text NOT NULL, + user_order bigint NOT NULL, + is_boolean NOT NULL +); + + + +CREATE TABLE group_users ( + group_id text NOT NULL, + user_id text NOT NULL, + is_admin boolean NOT NULL, + is_boolean NOT NULL +); + + + +CREATE TABLE groups ( + group_id text NOT NULL, + name text, + avatar_url text, + short_description text, + long_description text, + is_boolean NOT NULL, + join_policy text DEFAULT 'invite'::text NOT NULL +); + + + +CREATE TABLE guest_access ( + event_id text NOT NULL, + room_id text NOT NULL, + guest_access text NOT NULL +); + + + +CREATE TABLE history_visibility ( + event_id text NOT NULL, + room_id text NOT NULL, + history_visibility text NOT NULL +); + + + +CREATE TABLE local_group_membership ( + group_id text NOT NULL, + user_id text NOT NULL, + is_admin boolean NOT NULL, + membership text NOT NULL, + is_sed boolean NOT NULL, + content text NOT NULL +); + + + +CREATE TABLE local_group_updates ( + stream_id bigint NOT NULL, + group_id text NOT NULL, + user_id text NOT NULL, + type text NOT NULL, + content text NOT NULL +); + + + +CREATE TABLE local_invites ( + stream_id bigint NOT NULL, + inviter text NOT NULL, + invitee text NOT NULL, + event_id text NOT NULL, + room_id text NOT NULL, + locally_rejected text, + replaced_by text +); + + + +CREATE TABLE local_media_repository ( + media_id text, + media_type text, + media_length integer, + created_ts bigint, + upload_name text, + user_id text, + quarantined_by text, + url_cache text, + last_access_ts bigint +); + + + +CREATE TABLE local_media_repository_thumbnails ( + media_id text, + thumbnail_width integer, + thumbnail_height integer, + thumbnail_type text, + thumbnail_method text, + thumbnail_length integer +); + + + +CREATE TABLE local_media_repository_url_cache ( + url text, + response_code integer, + etag text, + expires_ts bigint, + og text, + media_id text, + download_ts bigint +); + + + +CREATE TABLE monthly_active_users ( + user_id text NOT NULL, + "timestamp" bigint NOT NULL +); + + + +CREATE TABLE open_id_tokens ( + token text NOT NULL, + ts_valid_until_ms bigint NOT NULL, + user_id text NOT NULL +); + + + +CREATE TABLE presence ( + user_id text NOT NULL, + state character varying(20), + status_msg text, + mtime bigint +); + + + +CREATE TABLE presence_allow_inbound ( + observed_user_id text NOT NULL, + observer_user_id text NOT NULL +); + + + +CREATE TABLE presence_stream ( + stream_id bigint, + user_id text, + state text, + last_active_ts bigint, + last_federation_update_ts bigint, + last_user_sync_ts bigint, + status_msg text, + currently_active boolean +); + + + +CREATE TABLE profiles ( + user_id text NOT NULL, + displayname text, + avatar_url text +); + + + +CREATE TABLE room_list_stream ( + stream_id bigint NOT NULL, + room_id text NOT NULL, + visibility boolean NOT NULL, + appservice_id text, + network_id text +); + + + +CREATE TABLE push_rules ( + id bigint NOT NULL, + user_name text NOT NULL, + rule_id text NOT NULL, + priority_class smallint NOT NULL, + priority integer DEFAULT 0 NOT NULL, + conditions text NOT NULL, + actions text NOT NULL +); + + + +CREATE TABLE push_rules_enable ( + id bigint NOT NULL, + user_name text NOT NULL, + rule_id text NOT NULL, + enabled smallint +); + + + +CREATE TABLE push_rules_stream ( + stream_id bigint NOT NULL, + event_stream_ordering bigint NOT NULL, + user_id text NOT NULL, + rule_id text NOT NULL, + op text NOT NULL, + priority_class smallint, + priority integer, + conditions text, + actions text +); + + + +CREATE TABLE pusher_throttle ( + pusher bigint NOT NULL, + room_id text NOT NULL, + last_sent_ts bigint, + throttle_ms bigint +); + + + +CREATE TABLE pushers ( + id bigint NOT NULL, + user_name text NOT NULL, + access_token bigint, + profile_tag text NOT NULL, + kind text NOT NULL, + app_id text NOT NULL, + app_display_name text NOT NULL, + device_display_name text NOT NULL, + pushkey text NOT NULL, + ts bigint NOT NULL, + lang text, + data text, + last_stream_ordering integer, + last_success bigint, + failing_since bigint +); + + + +CREATE TABLE ratelimit_override ( + user_id text NOT NULL, + messages_per_second bigint, + burst_count bigint +); + + + +CREATE TABLE receipts_graph ( + room_id text NOT NULL, + receipt_type text NOT NULL, + user_id text NOT NULL, + event_ids text NOT NULL, + data text NOT NULL +); + + + +CREATE TABLE receipts_linearized ( + stream_id bigint NOT NULL, + room_id text NOT NULL, + receipt_type text NOT NULL, + user_id text NOT NULL, + event_id text NOT NULL, + data text NOT NULL +); + + + +CREATE TABLE received_transactions ( + transaction_id text, + origin text, + ts bigint, + response_code integer, + response_json bytea, + has_been_referenced smallint DEFAULT 0 +); + + + +CREATE TABLE redactions ( + event_id text NOT NULL, + redacts text NOT NULL +); + + + +CREATE TABLE rejections ( + event_id text NOT NULL, + reason text NOT NULL, + last_check text NOT NULL +); + + + +CREATE TABLE remote_media_cache ( + media_origin text, + media_id text, + media_type text, + created_ts bigint, + upload_name text, + media_length integer, + filesystem_id text, + last_access_ts bigint, + quarantined_by text +); + + + +CREATE TABLE remote_media_cache_thumbnails ( + media_origin text, + media_id text, + thumbnail_width integer, + thumbnail_height integer, + thumbnail_method text, + thumbnail_type text, + thumbnail_length integer, + filesystem_id text +); + + + +CREATE TABLE remote_profile_cache ( + user_id text NOT NULL, + displayname text, + avatar_url text, + last_check bigint NOT NULL +); + + + +CREATE TABLE room_account_data ( + user_id text NOT NULL, + room_id text NOT NULL, + account_data_type text NOT NULL, + stream_id bigint NOT NULL, + content text NOT NULL +); + + + +CREATE TABLE room_alias_servers ( + room_alias text NOT NULL, + server text NOT NULL +); + + + +CREATE TABLE room_aliases ( + room_alias text NOT NULL, + room_id text NOT NULL, + creator text +); + + + +CREATE TABLE room_depth ( + room_id text NOT NULL, + min_depth integer NOT NULL +); + + + +CREATE TABLE room_memberships ( + event_id text NOT NULL, + user_id text NOT NULL, + sender text NOT NULL, + room_id text NOT NULL, + membership text NOT NULL, + forgotten integer DEFAULT 0, + display_name text, + avatar_url text +); + + + +CREATE TABLE room_names ( + event_id text NOT NULL, + room_id text NOT NULL, + name text NOT NULL +); + + + +CREATE TABLE room_state ( + room_id text NOT NULL, + join_rules text, + history_visibility text, + encryption text, + name text, + topic text, + avatar text, + canonical_alias text +); + + + +CREATE TABLE room_stats ( + room_id text NOT NULL, + ts bigint NOT NULL, + bucket_size integer NOT NULL, + current_state_events integer NOT NULL, + joined_members integer NOT NULL, + invited_members integer NOT NULL, + left_members integer NOT NULL, + banned_members integer NOT NULL, + state_events integer NOT NULL +); + + + +CREATE TABLE room_stats_earliest_token ( + room_id text NOT NULL, + token bigint NOT NULL +); + + + +CREATE TABLE room_tags ( + user_id text NOT NULL, + room_id text NOT NULL, + tag text NOT NULL, + content text NOT NULL +); + + + +CREATE TABLE room_tags_revisions ( + user_id text NOT NULL, + room_id text NOT NULL, + stream_id bigint NOT NULL +); + + + +CREATE TABLE rooms ( + room_id text NOT NULL, + is_boolean, + creator text +); + + + +CREATE TABLE schema_version ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + version integer NOT NULL, + upgraded boolean NOT NULL, + CONSTRAINT schema_version_lock_check CHECK ((lock = 'X'::bpchar)) +); + + + +CREATE TABLE server_keys_json ( + server_name text NOT NULL, + key_id text NOT NULL, + from_server text NOT NULL, + ts_added_ms bigint NOT NULL, + ts_valid_until_ms bigint NOT NULL, + key_json bytea NOT NULL +); + + + +CREATE TABLE server_signature_keys ( + server_name text, + key_id text, + from_server text, + ts_added_ms bigint, + verify_key bytea, + ts_valid_until_ms bigint +); + + + +CREATE TABLE state_events ( + event_id text NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + prev_state text +); + + + +CREATE TABLE state_group_edges ( + state_group bigint NOT NULL, + prev_state_group bigint NOT NULL +); + + + +CREATE SEQUENCE state_group_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + + +CREATE TABLE state_groups ( + id bigint NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL +); + + + +CREATE TABLE state_groups_state ( + state_group bigint NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + event_id text NOT NULL +); + + + +CREATE TABLE stats_stream_pos ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_id bigint, + CONSTRAINT stats_stream_pos_lock_check CHECK ((lock = 'X'::bpchar)) +); + + + +CREATE TABLE stream_ordering_to_exterm ( + stream_ordering bigint NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL +); + + + +CREATE TABLE threepid_guest_access_tokens ( + medium text, + address text, + guest_access_token text, + first_inviter text +); + + + +CREATE TABLE topics ( + event_id text NOT NULL, + room_id text NOT NULL, + topic text NOT NULL +); + + + +CREATE TABLE user_daily_visits ( + user_id text NOT NULL, + device_id text, + "timestamp" bigint NOT NULL +); + + + +CREATE TABLE user_directory ( + user_id text NOT NULL, + room_id text, + display_name text, + avatar_url text +); + + + +CREATE TABLE user_directory_search ( + user_id text NOT NULL, + vector tsvector +); + + + +CREATE TABLE user_directory_stream_pos ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_id bigint, + CONSTRAINT user_directory_stream_pos_lock_check CHECK ((lock = 'X'::bpchar)) +); + + + +CREATE TABLE user_filters ( + user_id text, + filter_id bigint, + filter_json bytea +); + + + +CREATE TABLE user_ips ( + user_id text NOT NULL, + access_token text NOT NULL, + device_id text, + ip text NOT NULL, + user_agent text NOT NULL, + last_seen bigint NOT NULL +); + + + +CREATE TABLE user_stats ( + user_id text NOT NULL, + ts bigint NOT NULL, + bucket_size integer NOT NULL, + rooms integer NOT NULL, + private_rooms integer NOT NULL +); + + + +CREATE TABLE user_threepid_id_server ( + user_id text NOT NULL, + medium text NOT NULL, + address text NOT NULL, + id_server text NOT NULL +); + + + +CREATE TABLE user_threepids ( + user_id text NOT NULL, + medium text NOT NULL, + address text NOT NULL, + validated_at bigint NOT NULL, + added_at bigint NOT NULL +); + + + +CREATE TABLE users ( + name text, + password_hash text, + creation_ts bigint, + admin smallint DEFAULT 0 NOT NULL, + upgrade_ts bigint, + is_guest smallint DEFAULT 0 NOT NULL, + appservice_id text, + consent_version text, + consent_server_notice_sent text, + user_type text +); + + + +CREATE TABLE users_in_rooms ( + user_id text NOT NULL, + room_id text NOT NULL +); + + + +CREATE TABLE users_pending_deactivation ( + user_id text NOT NULL +); + + + +CREATE TABLE users_who_share_private_rooms ( + user_id text NOT NULL, + other_user_id text NOT NULL, + room_id text NOT NULL +); + + + +ALTER TABLE ONLY access_tokens + ADD CONSTRAINT access_tokens_pkey PRIMARY KEY (id); + + + +ALTER TABLE ONLY access_tokens + ADD CONSTRAINT access_tokens_token_key UNIQUE (token); + + + +ALTER TABLE ONLY account_data + ADD CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type); + + + +ALTER TABLE ONLY account_validity + ADD CONSTRAINT account_validity_pkey PRIMARY KEY (user_id); + + + +ALTER TABLE ONLY application_services_state + ADD CONSTRAINT application_services_state_pkey PRIMARY KEY (as_id); + + + +ALTER TABLE ONLY application_services_txns + ADD CONSTRAINT application_services_txns_as_id_txn_id_key UNIQUE (as_id, txn_id); + + + +ALTER TABLE ONLY applied_module_schemas + ADD CONSTRAINT applied_module_schemas_module_name_file_key UNIQUE (module_name, file); + + + +ALTER TABLE ONLY applied_schema_deltas + ADD CONSTRAINT applied_schema_deltas_version_file_key UNIQUE (version, file); + + + +ALTER TABLE ONLY appservice_stream_position + ADD CONSTRAINT appservice_stream_position_lock_key UNIQUE (lock); + + + +ALTER TABLE ONLY background_updates + ADD CONSTRAINT background_updates_uniqueness UNIQUE (update_name); + + + +ALTER TABLE ONLY current_state_events + ADD CONSTRAINT current_state_events_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY current_state_events + ADD CONSTRAINT current_state_events_room_id_type_state_key_key UNIQUE (room_id, type, state_key); + + + +ALTER TABLE ONLY destinations + ADD CONSTRAINT destinations_pkey PRIMARY KEY (destination); + + + +ALTER TABLE ONLY devices + ADD CONSTRAINT device_uniqueness UNIQUE (user_id, device_id); + + + +ALTER TABLE ONLY e2e_device_keys_json + ADD CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id); + + + +ALTER TABLE ONLY e2e_one_time_keys_json + ADD CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id); + + + +ALTER TABLE ONLY event_backward_extremities + ADD CONSTRAINT event_backward_extremities_event_id_room_id_key UNIQUE (event_id, room_id); + + + +ALTER TABLE ONLY event_edges + ADD CONSTRAINT event_edges_event_id_prev_event_id_room_id_is_state_key UNIQUE (event_id, prev_event_id, room_id, is_state); + + + +ALTER TABLE ONLY event_forward_extremities + ADD CONSTRAINT event_forward_extremities_event_id_room_id_key UNIQUE (event_id, room_id); + + + +ALTER TABLE ONLY event_push_actions + ADD CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag); + + + +ALTER TABLE ONLY event_json + ADD CONSTRAINT event_json_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY event_push_summary_stream_ordering + ADD CONSTRAINT event_push_summary_stream_ordering_lock_key UNIQUE (lock); + + + +ALTER TABLE ONLY event_reference_hashes + ADD CONSTRAINT event_reference_hashes_event_id_algorithm_key UNIQUE (event_id, algorithm); + + + +ALTER TABLE ONLY event_reports + ADD CONSTRAINT event_reports_pkey PRIMARY KEY (id); + + + +ALTER TABLE ONLY event_to_state_groups + ADD CONSTRAINT event_to_state_groups_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY events + ADD CONSTRAINT events_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY events + ADD CONSTRAINT events_pkey PRIMARY KEY (stream_ordering); + + + +ALTER TABLE ONLY ex_outlier_stream + ADD CONSTRAINT ex_outlier_stream_pkey PRIMARY KEY (event_stream_ordering); + + + +ALTER TABLE ONLY group_roles + ADD CONSTRAINT group_roles_group_id_role_id_key UNIQUE (group_id, role_id); + + + +ALTER TABLE ONLY group_room_categories + ADD CONSTRAINT group_room_categories_group_id_category_id_key UNIQUE (group_id, category_id); + + + +ALTER TABLE ONLY group_summary_roles + ADD CONSTRAINT group_summary_roles_group_id_role_id_role_order_key UNIQUE (group_id, role_id, role_order); + + + +ALTER TABLE ONLY group_summary_room_categories + ADD CONSTRAINT group_summary_room_categories_group_id_category_id_cat_orde_key UNIQUE (group_id, category_id, cat_order); + + + +ALTER TABLE ONLY group_summary_rooms + ADD CONSTRAINT group_summary_rooms_group_id_category_id_room_id_room_order_key UNIQUE (group_id, category_id, room_id, room_order); + + + +ALTER TABLE ONLY guest_access + ADD CONSTRAINT guest_access_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY history_visibility + ADD CONSTRAINT history_visibility_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY local_media_repository + ADD CONSTRAINT local_media_repository_media_id_key UNIQUE (media_id); + + + +ALTER TABLE ONLY local_media_repository_thumbnails + ADD CONSTRAINT local_media_repository_thumbn_media_id_thumbnail_width_thum_key UNIQUE (media_id, thumbnail_width, thumbnail_height, thumbnail_type); + + + +ALTER TABLE ONLY user_threepids + ADD CONSTRAINT medium_address UNIQUE (medium, address); + + + +ALTER TABLE ONLY open_id_tokens + ADD CONSTRAINT open_id_tokens_pkey PRIMARY KEY (token); + + + +ALTER TABLE ONLY presence_allow_inbound + ADD CONSTRAINT presence_allow_inbound_observed_user_id_observer_user_id_key UNIQUE (observed_user_id, observer_user_id); + + + +ALTER TABLE ONLY presence + ADD CONSTRAINT presence_user_id_key UNIQUE (user_id); + + + +ALTER TABLE ONLY account_data_max_stream_id + ADD CONSTRAINT private_user_data_max_stream_id_lock_key UNIQUE (lock); + + + +ALTER TABLE ONLY profiles + ADD CONSTRAINT profiles_user_id_key UNIQUE (user_id); + + + +ALTER TABLE ONLY push_rules_enable + ADD CONSTRAINT push_rules_enable_pkey PRIMARY KEY (id); + + + +ALTER TABLE ONLY push_rules_enable + ADD CONSTRAINT push_rules_enable_user_name_rule_id_key UNIQUE (user_name, rule_id); + + + +ALTER TABLE ONLY push_rules + ADD CONSTRAINT push_rules_pkey PRIMARY KEY (id); + + + +ALTER TABLE ONLY push_rules + ADD CONSTRAINT push_rules_user_name_rule_id_key UNIQUE (user_name, rule_id); + + + +ALTER TABLE ONLY pusher_throttle + ADD CONSTRAINT pusher_throttle_pkey PRIMARY KEY (pusher, room_id); + + + +ALTER TABLE ONLY pushers + ADD CONSTRAINT pushers2_app_id_pushkey_user_name_key UNIQUE (app_id, pushkey, user_name); + + + +ALTER TABLE ONLY pushers + ADD CONSTRAINT pushers2_pkey PRIMARY KEY (id); + + + +ALTER TABLE ONLY receipts_graph + ADD CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id); + + + +ALTER TABLE ONLY receipts_linearized + ADD CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id); + + + +ALTER TABLE ONLY received_transactions + ADD CONSTRAINT received_transactions_transaction_id_origin_key UNIQUE (transaction_id, origin); + + + +ALTER TABLE ONLY redactions + ADD CONSTRAINT redactions_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY rejections + ADD CONSTRAINT rejections_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY remote_media_cache + ADD CONSTRAINT remote_media_cache_media_origin_media_id_key UNIQUE (media_origin, media_id); + + + +ALTER TABLE ONLY remote_media_cache_thumbnails + ADD CONSTRAINT remote_media_cache_thumbnails_media_origin_media_id_thumbna_key UNIQUE (media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type); + + + +ALTER TABLE ONLY room_account_data + ADD CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type); + + + +ALTER TABLE ONLY room_aliases + ADD CONSTRAINT room_aliases_room_alias_key UNIQUE (room_alias); + + + +ALTER TABLE ONLY room_depth + ADD CONSTRAINT room_depth_room_id_key UNIQUE (room_id); + + + +ALTER TABLE ONLY room_memberships + ADD CONSTRAINT room_memberships_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY room_names + ADD CONSTRAINT room_names_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY room_tags_revisions + ADD CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id); + + + +ALTER TABLE ONLY room_tags + ADD CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag); + + + +ALTER TABLE ONLY rooms + ADD CONSTRAINT rooms_pkey PRIMARY KEY (room_id); + + + +ALTER TABLE ONLY schema_version + ADD CONSTRAINT schema_version_lock_key UNIQUE (lock); + + + +ALTER TABLE ONLY server_keys_json + ADD CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server); + + + +ALTER TABLE ONLY server_signature_keys + ADD CONSTRAINT server_signature_keys_server_name_key_id_key UNIQUE (server_name, key_id); + + + +ALTER TABLE ONLY state_events + ADD CONSTRAINT state_events_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY state_groups + ADD CONSTRAINT state_groups_pkey PRIMARY KEY (id); + + + +ALTER TABLE ONLY stats_stream_pos + ADD CONSTRAINT stats_stream_pos_lock_key UNIQUE (lock); + + + +ALTER TABLE ONLY topics + ADD CONSTRAINT topics_event_id_key UNIQUE (event_id); + + + +ALTER TABLE ONLY user_directory_stream_pos + ADD CONSTRAINT user_directory_stream_pos_lock_key UNIQUE (lock); + + + +ALTER TABLE ONLY users + ADD CONSTRAINT users_name_key UNIQUE (name); + + + +CREATE INDEX _extremities_to_check_id ON _extremities_to_check USING btree (event_id); + + + +CREATE INDEX account_data_stream_id ON account_data USING btree (user_id, stream_id); + + + +CREATE INDEX application_services_txns_id ON application_services_txns USING btree (as_id); + + + +CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list USING btree (appservice_id, network_id, room_id); + + + +CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms USING btree (room_id); + + + +CREATE INDEX cache_invalidation_stream_id ON cache_invalidation_stream USING btree (stream_id); + + + +CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream USING btree (stream_id); + + + +CREATE INDEX deleted_pushers_stream_id ON deleted_pushers USING btree (stream_id); + + + +CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox USING btree (origin, message_id); + + + +CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox USING btree (destination, stream_id); + + + +CREATE INDEX device_federation_outbox_id ON device_federation_outbox USING btree (stream_id); + + + +CREATE INDEX device_inbox_stream_id ON device_inbox USING btree (stream_id); + + + +CREATE INDEX device_inbox_user_stream_id ON device_inbox USING btree (user_id, device_id, stream_id); + + + +CREATE INDEX device_lists_outbound_last_success_idx ON device_lists_outbound_last_success USING btree (destination, user_id, stream_id); + + + +CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes USING btree (destination, stream_id); + + + +CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes USING btree (stream_id); + + + +CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes USING btree (destination, user_id); + + + +CREATE INDEX device_lists_stream_id ON device_lists_stream USING btree (stream_id, user_id); + + + +CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys USING btree (user_id, room_id, session_id); + + + +CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions USING btree (user_id, version); + + + +CREATE UNIQUE INDEX erased_users_user ON erased_users USING btree (user_id); + + + +CREATE INDEX ev_b_extrem_id ON event_backward_extremities USING btree (event_id); + + + +CREATE INDEX ev_b_extrem_room ON event_backward_extremities USING btree (room_id); + + + +CREATE INDEX ev_edges_id ON event_edges USING btree (event_id); + + + +CREATE INDEX ev_edges_prev_id ON event_edges USING btree (prev_event_id); + + + +CREATE INDEX ev_extrem_id ON event_forward_extremities USING btree (event_id); + + + +CREATE INDEX ev_extrem_room ON event_forward_extremities USING btree (room_id); + + + +CREATE INDEX evauth_edges_id ON event_auth USING btree (event_id); + + + +CREATE INDEX event_json_room_id ON event_json USING btree (room_id); + + + +CREATE INDEX event_push_actions_rm_tokens ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering); + + + +CREATE INDEX event_push_actions_room_id_user_id ON event_push_actions USING btree (room_id, user_id); + + + +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging USING btree (event_id); + + + +CREATE INDEX event_push_actions_stream_ordering ON event_push_actions USING btree (stream_ordering, user_id); + + + +CREATE INDEX event_push_summary_user_rm ON event_push_summary USING btree (user_id, room_id); + + + +CREATE INDEX event_reference_hashes_id ON event_reference_hashes USING btree (event_id); + + + +CREATE UNIQUE INDEX event_relations_id ON event_relations USING btree (event_id); + + + +CREATE INDEX event_relations_relates ON event_relations USING btree (relates_to_id, relation_type, aggregation_key); + + + +CREATE INDEX event_search_ev_ridx ON event_search USING btree (room_id); + + + +CREATE INDEX event_search_fts_idx ON event_search USING gin (vector); + + + +CREATE INDEX events_order_room ON events USING btree (room_id, topological_ordering, stream_ordering); + + + +CREATE INDEX events_room_stream ON events USING btree (room_id, stream_ordering); + + + +CREATE INDEX events_ts ON events USING btree (origin_server_ts, stream_ordering); + + + +CREATE INDEX group_attestations_remote_g_idx ON group_attestations_remote USING btree (group_id, user_id); + + + +CREATE INDEX group_attestations_remote_u_idx ON group_attestations_remote USING btree (user_id); + + + +CREATE INDEX group_attestations_remote_v_idx ON group_attestations_remote USING btree (valid_until_ms); + + + +CREATE INDEX group_attestations_renewals_g_idx ON group_attestations_renewals USING btree (group_id, user_id); + + + +CREATE INDEX group_attestations_renewals_u_idx ON group_attestations_renewals USING btree (user_id); + + + +CREATE INDEX group_attestations_renewals_v_idx ON group_attestations_renewals USING btree (valid_until_ms); + + + +CREATE UNIQUE INDEX group_invites_g_idx ON group_invites USING btree (group_id, user_id); + + + +CREATE INDEX group_invites_u_idx ON group_invites USING btree (user_id); + + + +CREATE UNIQUE INDEX group_rooms_g_idx ON group_rooms USING btree (group_id, room_id); + + + +CREATE INDEX group_rooms_r_idx ON group_rooms USING btree (room_id); + + + +CREATE UNIQUE INDEX group_summary_rooms_g_idx ON group_summary_rooms USING btree (group_id, room_id, category_id); + + + +CREATE INDEX group_summary_users_g_idx ON group_summary_users USING btree (group_id); + + + +CREATE UNIQUE INDEX group_users_g_idx ON group_users USING btree (group_id, user_id); + + + +CREATE INDEX group_users_u_idx ON group_users USING btree (user_id); + + + +CREATE UNIQUE INDEX groups_idx ON groups USING btree (group_id); + + + +CREATE INDEX local_group_membership_g_idx ON local_group_membership USING btree (group_id); + + + +CREATE INDEX local_group_membership_u_idx ON local_group_membership USING btree (user_id, group_id); + + + +CREATE INDEX local_invites_for_user_idx ON local_invites USING btree (invitee, locally_rejected, replaced_by, room_id); + + + +CREATE INDEX local_invites_id ON local_invites USING btree (stream_id); + + + +CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails USING btree (media_id); + + + +CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache USING btree (url, download_ts); + + + +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache USING btree (expires_ts); + + + +CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache USING btree (media_id); + + + +CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users USING btree ("timestamp"); + + + +CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users USING btree (user_id); + + + +CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens USING btree (ts_valid_until_ms); + + + +CREATE INDEX presence_stream_id ON presence_stream USING btree (stream_id, user_id); + + + +CREATE INDEX presence_stream_user_id ON presence_stream USING btree (user_id); + + + +CREATE INDEX room_index ON rooms USING btree (is_; + + + +CREATE INDEX room_list_stream_idx ON room_list_stream USING btree (stream_id); + + + +CREATE INDEX room_list_stream_rm_idx ON room_list_stream USING btree (room_id, stream_id); + + + +CREATE INDEX push_rules_enable_user_name ON push_rules_enable USING btree (user_name); + + + +CREATE INDEX push_rules_stream_id ON push_rules_stream USING btree (stream_id); + + + +CREATE INDEX push_rules_stream_user_stream_id ON push_rules_stream USING btree (user_id, stream_id); + + + +CREATE INDEX push_rules_user_name ON push_rules USING btree (user_name); + + + +CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override USING btree (user_id); + + + +CREATE INDEX receipts_linearized_id ON receipts_linearized USING btree (stream_id); + + + +CREATE INDEX receipts_linearized_room_stream ON receipts_linearized USING btree (room_id, stream_id); + + + +CREATE INDEX receipts_linearized_user ON receipts_linearized USING btree (user_id); + + + +CREATE INDEX received_transactions_ts ON received_transactions USING btree (ts); + + + +CREATE INDEX redactions_redacts ON redactions USING btree (redacts); + + + +CREATE INDEX remote_profile_cache_time ON remote_profile_cache USING btree (last_check); + + + +CREATE UNIQUE INDEX remote_profile_cache_user_id ON remote_profile_cache USING btree (user_id); + + + +CREATE INDEX room_account_data_stream_id ON room_account_data USING btree (user_id, stream_id); + + + +CREATE INDEX room_alias_servers_alias ON room_alias_servers USING btree (room_alias); + + + +CREATE INDEX room_aliases_id ON room_aliases USING btree (room_id); + + + +CREATE INDEX room_depth_room ON room_depth USING btree (room_id); + + + +CREATE INDEX room_memberships_room_id ON room_memberships USING btree (room_id); + + + +CREATE INDEX room_memberships_user_id ON room_memberships USING btree (user_id); + + + +CREATE INDEX room_names_room_id ON room_names USING btree (room_id); + + + +CREATE UNIQUE INDEX room_state_room ON room_state USING btree (room_id); + + + +CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token USING btree (room_id); + + + +CREATE UNIQUE INDEX room_stats_room_ts ON room_stats USING btree (room_id, ts); + + + +CREATE INDEX state_group_edges_idx ON state_group_edges USING btree (state_group); + + + +CREATE INDEX state_group_edges_prev_idx ON state_group_edges USING btree (prev_state_group); + + + +CREATE INDEX state_groups_state_id ON state_groups_state USING btree (state_group); + + + +CREATE INDEX stream_ordering_to_exterm_idx ON stream_ordering_to_exterm USING btree (stream_ordering); + + + +CREATE INDEX stream_ordering_to_exterm_rm_idx ON stream_ordering_to_exterm USING btree (room_id, stream_ordering); + + + +CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens USING btree (medium, address); + + + +CREATE INDEX topics_room_id ON topics USING btree (room_id); + + + +CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits USING btree ("timestamp"); + + + +CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits USING btree (user_id, "timestamp"); + + + +CREATE INDEX user_directory_room_idx ON user_directory USING btree (room_id); + + + +CREATE INDEX user_directory_search_fts_idx ON user_directory_search USING gin (vector); + + + +CREATE UNIQUE INDEX user_directory_search_user_idx ON user_directory_search USING btree (user_id); + + + +CREATE UNIQUE INDEX user_directory_user_idx ON user_directory USING btree (user_id); + + + +CREATE INDEX user_filters_by_user_id_filter_id ON user_filters USING btree (user_id, filter_id); + + + +CREATE INDEX user_ips_user_ip ON user_ips USING btree (user_id, access_token, ip); + + + +CREATE UNIQUE INDEX user_stats_user_ts ON user_stats USING btree (user_id, ts); + + + +CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server USING btree (user_id, medium, address, id_server); + + + +CREATE INDEX user_threepids_medium_address ON user_threepids USING btree (medium, address); + + + +CREATE INDEX user_threepids_user_id ON user_threepids USING btree (user_id); + + + +CREATE UNIQUE INDEX users_in_rooms_u_idx ON users_in_rooms USING btree (user_id, room_id); + + + +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms USING btree (other_user_id); + + + +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms USING btree (room_id); + + + +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms USING btree (user_id, other_user_id, room_id); + + + diff --git a/synapse/storage/schema/full_schemas/54/full.sql.sqlite b/synapse/storage/schema/full_schemas/54/full.sql.sqlite new file mode 100644 index 0000000000..0b60a6c789 --- /dev/null +++ b/synapse/storage/schema/full_schemas/54/full.sql.sqlite @@ -0,0 +1,261 @@ +CREATE TABLE application_services_state( as_id TEXT PRIMARY KEY, state VARCHAR(5), last_txn INTEGER ); +CREATE TABLE application_services_txns( as_id TEXT NOT NULL, txn_id INTEGER NOT NULL, event_ids TEXT NOT NULL, UNIQUE(as_id, txn_id) ); +CREATE INDEX application_services_txns_id ON application_services_txns ( as_id ); +CREATE TABLE presence( user_id TEXT NOT NULL, state VARCHAR(20), status_msg TEXT, mtime BIGINT, UNIQUE (user_id) ); +CREATE TABLE presence_allow_inbound( observed_user_id TEXT NOT NULL, observer_user_id TEXT NOT NULL, UNIQUE (observed_user_id, observer_user_id) ); +CREATE TABLE users( name TEXT, password_hash TEXT, creation_ts BIGINT, admin SMALLINT DEFAULT 0 NOT NULL, upgrade_ts BIGINT, is_guest SMALLINT DEFAULT 0 NOT NULL, appservice_id TEXT, consent_version TEXT, consent_server_notice_sent TEXT, user_type TEXT DEFAULT NULL, UNIQUE(name) ); +CREATE TABLE access_tokens( id BIGINT PRIMARY KEY, user_id TEXT NOT NULL, device_id TEXT, token TEXT NOT NULL, last_used BIGINT, UNIQUE(token) ); +CREATE TABLE user_ips ( user_id TEXT NOT NULL, access_token TEXT NOT NULL, device_id TEXT, ip TEXT NOT NULL, user_agent TEXT NOT NULL, last_seen BIGINT NOT NULL ); +CREATE TABLE profiles( user_id TEXT NOT NULL, displayname TEXT, avatar_url TEXT, UNIQUE(user_id) ); +CREATE TABLE received_transactions( transaction_id TEXT, origin TEXT, ts BIGINT, response_code INTEGER, response_json bytea, has_been_referenced smallint default 0, UNIQUE (transaction_id, origin) ); +CREATE TABLE destinations( destination TEXT PRIMARY KEY, retry_last_ts BIGINT, retry_interval INTEGER ); +CREATE TABLE events( stream_ordering INTEGER PRIMARY KEY, topological_ordering BIGINT NOT NULL, event_id TEXT NOT NULL, type TEXT NOT NULL, room_id TEXT NOT NULL, content TEXT, unrecognized_keys TEXT, processed BOOL NOT NULL, outlier BOOL NOT NULL, depth BIGINT DEFAULT 0 NOT NULL, origin_server_ts BIGINT, received_ts BIGINT, sender TEXT, contains_url BOOLEAN, UNIQUE (event_id) ); +CREATE INDEX events_order_room ON events ( room_id, topological_ordering, stream_ordering ); +CREATE TABLE event_json( event_id TEXT NOT NULL, room_id TEXT NOT NULL, internal_metadata TEXT NOT NULL, json TEXT NOT NULL, format_version INTEGER, UNIQUE (event_id) ); +CREATE INDEX event_json_room_id ON event_json(room_id); +CREATE TABLE state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, prev_state TEXT, UNIQUE (event_id) ); +CREATE TABLE current_state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, UNIQUE (event_id), UNIQUE (room_id, type, state_key) ); +CREATE TABLE room_memberships( event_id TEXT NOT NULL, user_id TEXT NOT NULL, sender TEXT NOT NULL, room_id TEXT NOT NULL, membership TEXT NOT NULL, forgotten INTEGER DEFAULT 0, display_name TEXT, avatar_url TEXT, UNIQUE (event_id) ); +CREATE INDEX room_memberships_room_id ON room_memberships (room_id); +CREATE INDEX room_memberships_user_id ON room_memberships (user_id); +CREATE TABLE topics( event_id TEXT NOT NULL, room_id TEXT NOT NULL, topic TEXT NOT NULL, UNIQUE (event_id) ); +CREATE INDEX topics_room_id ON topics(room_id); +CREATE TABLE room_names( event_id TEXT NOT NULL, room_id TEXT NOT NULL, name TEXT NOT NULL, UNIQUE (event_id) ); +CREATE INDEX room_names_room_id ON room_names(room_id); +CREATE TABLE rooms( room_id TEXT PRIMARY KEY NOT NULL, is_public BOOL, creator TEXT ); +CREATE TABLE server_signature_keys( server_name TEXT, key_id TEXT, from_server TEXT, ts_added_ms BIGINT, verify_key bytea, ts_valid_until_ms BIGINT, UNIQUE (server_name, key_id) ); +CREATE TABLE rejections( event_id TEXT NOT NULL, reason TEXT NOT NULL, last_check TEXT NOT NULL, UNIQUE (event_id) ); +CREATE TABLE push_rules ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, priority_class SMALLINT NOT NULL, priority INTEGER NOT NULL DEFAULT 0, conditions TEXT NOT NULL, actions TEXT NOT NULL, UNIQUE(user_name, rule_id) ); +CREATE INDEX push_rules_user_name on push_rules (user_name); +CREATE TABLE user_filters( user_id TEXT, filter_id BIGINT, filter_json bytea ); +CREATE INDEX user_filters_by_user_id_filter_id ON user_filters( user_id, filter_id ); +CREATE TABLE push_rules_enable ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, enabled SMALLINT, UNIQUE(user_name, rule_id) ); +CREATE INDEX push_rules_enable_user_name on push_rules_enable (user_name); +CREATE TABLE event_forward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) ); +CREATE INDEX ev_extrem_room ON event_forward_extremities(room_id); +CREATE INDEX ev_extrem_id ON event_forward_extremities(event_id); +CREATE TABLE event_backward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) ); +CREATE INDEX ev_b_extrem_room ON event_backward_extremities(room_id); +CREATE INDEX ev_b_extrem_id ON event_backward_extremities(event_id); +CREATE TABLE event_edges( event_id TEXT NOT NULL, prev_event_id TEXT NOT NULL, room_id TEXT NOT NULL, is_state BOOL NOT NULL, UNIQUE (event_id, prev_event_id, room_id, is_state) ); +CREATE INDEX ev_edges_id ON event_edges(event_id); +CREATE INDEX ev_edges_prev_id ON event_edges(prev_event_id); +CREATE TABLE room_depth( room_id TEXT NOT NULL, min_depth INTEGER NOT NULL, UNIQUE (room_id) ); +CREATE INDEX room_depth_room ON room_depth(room_id); +CREATE TABLE state_groups( id BIGINT PRIMARY KEY, room_id TEXT NOT NULL, event_id TEXT NOT NULL ); +CREATE TABLE state_groups_state( state_group BIGINT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, event_id TEXT NOT NULL ); +CREATE TABLE event_to_state_groups( event_id TEXT NOT NULL, state_group BIGINT NOT NULL, UNIQUE (event_id) ); +CREATE TABLE local_media_repository ( media_id TEXT, media_type TEXT, media_length INTEGER, created_ts BIGINT, upload_name TEXT, user_id TEXT, quarantined_by TEXT, url_cache TEXT, last_access_ts BIGINT, UNIQUE (media_id) ); +CREATE TABLE local_media_repository_thumbnails ( media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_type TEXT, thumbnail_method TEXT, thumbnail_length INTEGER, UNIQUE ( media_id, thumbnail_width, thumbnail_height, thumbnail_type ) ); +CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails (media_id); +CREATE TABLE remote_media_cache ( media_origin TEXT, media_id TEXT, media_type TEXT, created_ts BIGINT, upload_name TEXT, media_length INTEGER, filesystem_id TEXT, last_access_ts BIGINT, quarantined_by TEXT, UNIQUE (media_origin, media_id) ); +CREATE TABLE remote_media_cache_thumbnails ( media_origin TEXT, media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_method TEXT, thumbnail_type TEXT, thumbnail_length INTEGER, filesystem_id TEXT, UNIQUE ( media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type ) ); +CREATE TABLE redactions ( event_id TEXT NOT NULL, redacts TEXT NOT NULL, UNIQUE (event_id) ); +CREATE INDEX redactions_redacts ON redactions (redacts); +CREATE TABLE room_aliases( room_alias TEXT NOT NULL, room_id TEXT NOT NULL, creator TEXT, UNIQUE (room_alias) ); +CREATE INDEX room_aliases_id ON room_aliases(room_id); +CREATE TABLE room_alias_servers( room_alias TEXT NOT NULL, server TEXT NOT NULL ); +CREATE INDEX room_alias_servers_alias ON room_alias_servers(room_alias); +CREATE TABLE event_reference_hashes ( event_id TEXT, algorithm TEXT, hash bytea, UNIQUE (event_id, algorithm) ); +CREATE INDEX event_reference_hashes_id ON event_reference_hashes(event_id); +CREATE TABLE IF NOT EXISTS "server_keys_json" ( server_name TEXT NOT NULL, key_id TEXT NOT NULL, from_server TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, ts_valid_until_ms BIGINT NOT NULL, key_json bytea NOT NULL, CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server) ); +CREATE TABLE e2e_device_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id) ); +CREATE TABLE e2e_one_time_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, algorithm TEXT NOT NULL, key_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id) ); +CREATE TABLE receipts_graph( room_id TEXT NOT NULL, receipt_type TEXT NOT NULL, user_id TEXT NOT NULL, event_ids TEXT NOT NULL, data TEXT NOT NULL, CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id) ); +CREATE TABLE receipts_linearized ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, receipt_type TEXT NOT NULL, user_id TEXT NOT NULL, event_id TEXT NOT NULL, data TEXT NOT NULL, CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id) ); +CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id ); +CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id ); +CREATE TABLE IF NOT EXISTS "user_threepids" ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, validated_at BIGINT NOT NULL, added_at BIGINT NOT NULL, CONSTRAINT medium_address UNIQUE (medium, address) ); +CREATE INDEX user_threepids_user_id ON user_threepids(user_id); +CREATE TABLE background_updates( update_name TEXT NOT NULL, progress_json TEXT NOT NULL, depends_on TEXT, CONSTRAINT background_updates_uniqueness UNIQUE (update_name) ); +CREATE VIRTUAL TABLE event_search USING fts4 ( event_id, room_id, sender, key, value ) +/* event_search(event_id,room_id,sender,"key",value) */; +CREATE TABLE IF NOT EXISTS 'event_search_content'(docid INTEGER PRIMARY KEY, 'c0event_id', 'c1room_id', 'c2sender', 'c3key', 'c4value'); +CREATE TABLE IF NOT EXISTS 'event_search_segments'(blockid INTEGER PRIMARY KEY, block BLOB); +CREATE TABLE IF NOT EXISTS 'event_search_segdir'(level INTEGER,idx INTEGER,start_block INTEGER,leaves_end_block INTEGER,end_block INTEGER,root BLOB,PRIMARY KEY(level, idx)); +CREATE TABLE IF NOT EXISTS 'event_search_docsize'(docid INTEGER PRIMARY KEY, size BLOB); +CREATE TABLE IF NOT EXISTS 'event_search_stat'(id INTEGER PRIMARY KEY, value BLOB); +CREATE TABLE guest_access( event_id TEXT NOT NULL, room_id TEXT NOT NULL, guest_access TEXT NOT NULL, UNIQUE (event_id) ); +CREATE TABLE history_visibility( event_id TEXT NOT NULL, room_id TEXT NOT NULL, history_visibility TEXT NOT NULL, UNIQUE (event_id) ); +CREATE TABLE room_tags( user_id TEXT NOT NULL, room_id TEXT NOT NULL, tag TEXT NOT NULL, content TEXT NOT NULL, CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) ); +CREATE TABLE room_tags_revisions ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, stream_id BIGINT NOT NULL, CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id) ); +CREATE TABLE IF NOT EXISTS "account_data_max_stream_id"( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_id BIGINT NOT NULL, CHECK (Lock='X') ); +CREATE TABLE account_data( user_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type) ); +CREATE TABLE room_account_data( user_id TEXT NOT NULL, room_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type) ); +CREATE INDEX account_data_stream_id on account_data(user_id, stream_id); +CREATE INDEX room_account_data_stream_id on room_account_data(user_id, stream_id); +CREATE INDEX events_ts ON events(origin_server_ts, stream_ordering); +CREATE TABLE event_push_actions( room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, profile_tag VARCHAR(32), actions TEXT NOT NULL, topological_ordering BIGINT, stream_ordering BIGINT, notif SMALLINT, highlight SMALLINT, CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag) ); +CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id); +CREATE INDEX events_room_stream on events(room_id, stream_ordering); +CREATE INDEX public_room_index on rooms(is_public); +CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id ); +CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering ); +CREATE TABLE presence_stream( stream_id BIGINT, user_id TEXT, state TEXT, last_active_ts BIGINT, last_federation_update_ts BIGINT, last_user_sync_ts BIGINT, status_msg TEXT, currently_active BOOLEAN ); +CREATE INDEX presence_stream_id ON presence_stream(stream_id, user_id); +CREATE INDEX presence_stream_user_id ON presence_stream(user_id); +CREATE TABLE push_rules_stream( stream_id BIGINT NOT NULL, event_stream_ordering BIGINT NOT NULL, user_id TEXT NOT NULL, rule_id TEXT NOT NULL, op TEXT NOT NULL, priority_class SMALLINT, priority INTEGER, conditions TEXT, actions TEXT ); +CREATE INDEX push_rules_stream_id ON push_rules_stream(stream_id); +CREATE INDEX push_rules_stream_user_stream_id on push_rules_stream(user_id, stream_id); +CREATE TABLE ex_outlier_stream( event_stream_ordering BIGINT PRIMARY KEY NOT NULL, event_id TEXT NOT NULL, state_group BIGINT NOT NULL ); +CREATE TABLE threepid_guest_access_tokens( medium TEXT, address TEXT, guest_access_token TEXT, first_inviter TEXT ); +CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens(medium, address); +CREATE TABLE local_invites( stream_id BIGINT NOT NULL, inviter TEXT NOT NULL, invitee TEXT NOT NULL, event_id TEXT NOT NULL, room_id TEXT NOT NULL, locally_rejected TEXT, replaced_by TEXT ); +CREATE INDEX local_invites_id ON local_invites(stream_id); +CREATE INDEX local_invites_for_user_idx ON local_invites(invitee, locally_rejected, replaced_by, room_id); +CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id ); +CREATE TABLE open_id_tokens ( token TEXT NOT NULL PRIMARY KEY, ts_valid_until_ms bigint NOT NULL, user_id TEXT NOT NULL, UNIQUE (token) ); +CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens(ts_valid_until_ms); +CREATE TABLE pusher_throttle( pusher BIGINT NOT NULL, room_id TEXT NOT NULL, last_sent_ts BIGINT, throttle_ms BIGINT, PRIMARY KEY (pusher, room_id) ); +CREATE TABLE event_reports( id BIGINT NOT NULL PRIMARY KEY, received_ts BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, reason TEXT, content TEXT ); +CREATE TABLE devices ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, display_name TEXT, CONSTRAINT device_uniqueness UNIQUE (user_id, device_id) ); +CREATE TABLE appservice_stream_position( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT, CHECK (Lock='X') ); +CREATE TABLE device_inbox ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, stream_id BIGINT NOT NULL, message_json TEXT NOT NULL ); +CREATE INDEX device_inbox_user_stream_id ON device_inbox(user_id, device_id, stream_id); +CREATE INDEX received_transactions_ts ON received_transactions(ts); +CREATE TABLE device_federation_outbox ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, queued_ts BIGINT NOT NULL, messages_json TEXT NOT NULL ); +CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox(destination, stream_id); +CREATE TABLE device_federation_inbox ( origin TEXT NOT NULL, message_id TEXT NOT NULL, received_ts BIGINT NOT NULL ); +CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox(origin, message_id); +CREATE TABLE device_max_stream_id ( stream_id BIGINT NOT NULL ); +CREATE TABLE public_room_list_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, visibility BOOLEAN NOT NULL , appservice_id TEXT, network_id TEXT); +CREATE INDEX public_room_list_stream_idx on public_room_list_stream( stream_id ); +CREATE INDEX public_room_list_stream_rm_idx on public_room_list_stream( room_id, stream_id ); +CREATE TABLE state_group_edges( state_group BIGINT NOT NULL, prev_state_group BIGINT NOT NULL ); +CREATE INDEX state_group_edges_idx ON state_group_edges(state_group); +CREATE INDEX state_group_edges_prev_idx ON state_group_edges(prev_state_group); +CREATE TABLE stream_ordering_to_exterm ( stream_ordering BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL ); +CREATE INDEX stream_ordering_to_exterm_idx on stream_ordering_to_exterm( stream_ordering ); +CREATE INDEX stream_ordering_to_exterm_rm_idx on stream_ordering_to_exterm( room_id, stream_ordering ); +CREATE TABLE IF NOT EXISTS "event_auth"( event_id TEXT NOT NULL, auth_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE INDEX evauth_edges_id ON event_auth(event_id); +CREATE INDEX user_threepids_medium_address on user_threepids (medium, address); +CREATE TABLE appservice_room_list( appservice_id TEXT NOT NULL, network_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list( appservice_id, network_id, room_id ); +CREATE INDEX device_federation_outbox_id ON device_federation_outbox(stream_id); +CREATE TABLE federation_stream_position( type TEXT NOT NULL, stream_id INTEGER NOT NULL ); +CREATE TABLE device_lists_remote_cache ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, content TEXT NOT NULL ); +CREATE TABLE device_lists_remote_extremeties ( user_id TEXT NOT NULL, stream_id TEXT NOT NULL ); +CREATE TABLE device_lists_stream ( stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL ); +CREATE INDEX device_lists_stream_id ON device_lists_stream(stream_id, user_id); +CREATE TABLE device_lists_outbound_pokes ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL, sent BOOLEAN NOT NULL, ts BIGINT NOT NULL ); +CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes(destination, stream_id); +CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes(destination, user_id); +CREATE TABLE event_push_summary ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, notif_count BIGINT NOT NULL, stream_ordering BIGINT NOT NULL ); +CREATE INDEX event_push_summary_user_rm ON event_push_summary(user_id, room_id); +CREATE TABLE event_push_summary_stream_ordering ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT NOT NULL, CHECK (Lock='X') ); +CREATE TABLE IF NOT EXISTS "pushers" ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, access_token BIGINT DEFAULT NULL, profile_tag TEXT NOT NULL, kind TEXT NOT NULL, app_id TEXT NOT NULL, app_display_name TEXT NOT NULL, device_display_name TEXT NOT NULL, pushkey TEXT NOT NULL, ts BIGINT NOT NULL, lang TEXT, data TEXT, last_stream_ordering INTEGER, last_success BIGINT, failing_since BIGINT, UNIQUE (app_id, pushkey, user_name) ); +CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes(stream_id); +CREATE TABLE ratelimit_override ( user_id TEXT NOT NULL, messages_per_second BIGINT, burst_count BIGINT ); +CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override(user_id); +CREATE TABLE current_state_delta_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, event_id TEXT, prev_event_id TEXT ); +CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream(stream_id); +CREATE TABLE device_lists_outbound_last_success ( destination TEXT NOT NULL, user_id TEXT NOT NULL, stream_id BIGINT NOT NULL ); +CREATE INDEX device_lists_outbound_last_success_idx ON device_lists_outbound_last_success( destination, user_id, stream_id ); +CREATE TABLE user_directory_stream_pos ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_id BIGINT, CHECK (Lock='X') ); +CREATE VIRTUAL TABLE user_directory_search USING fts4 ( user_id, value ) +/* user_directory_search(user_id,value) */; +CREATE TABLE IF NOT EXISTS 'user_directory_search_content'(docid INTEGER PRIMARY KEY, 'c0user_id', 'c1value'); +CREATE TABLE IF NOT EXISTS 'user_directory_search_segments'(blockid INTEGER PRIMARY KEY, block BLOB); +CREATE TABLE IF NOT EXISTS 'user_directory_search_segdir'(level INTEGER,idx INTEGER,start_block INTEGER,leaves_end_block INTEGER,end_block INTEGER,root BLOB,PRIMARY KEY(level, idx)); +CREATE TABLE IF NOT EXISTS 'user_directory_search_docsize'(docid INTEGER PRIMARY KEY, size BLOB); +CREATE TABLE IF NOT EXISTS 'user_directory_search_stat'(id INTEGER PRIMARY KEY, value BLOB); +CREATE TABLE blocked_rooms ( room_id TEXT NOT NULL, user_id TEXT NOT NULL ); +CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms(room_id); +CREATE TABLE IF NOT EXISTS "local_media_repository_url_cache"( url TEXT, response_code INTEGER, etag TEXT, expires_ts BIGINT, og TEXT, media_id TEXT, download_ts BIGINT ); +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(expires_ts); +CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache(url, download_ts); +CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache(media_id); +CREATE TABLE group_users ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, is_admin BOOLEAN NOT NULL, is_public BOOLEAN NOT NULL ); +CREATE TABLE group_invites ( group_id TEXT NOT NULL, user_id TEXT NOT NULL ); +CREATE TABLE group_rooms ( group_id TEXT NOT NULL, room_id TEXT NOT NULL, is_public BOOLEAN NOT NULL ); +CREATE TABLE group_summary_rooms ( group_id TEXT NOT NULL, room_id TEXT NOT NULL, category_id TEXT NOT NULL, room_order BIGINT NOT NULL, is_public BOOLEAN NOT NULL, UNIQUE (group_id, category_id, room_id, room_order), CHECK (room_order > 0) ); +CREATE UNIQUE INDEX group_summary_rooms_g_idx ON group_summary_rooms(group_id, room_id, category_id); +CREATE TABLE group_summary_room_categories ( group_id TEXT NOT NULL, category_id TEXT NOT NULL, cat_order BIGINT NOT NULL, UNIQUE (group_id, category_id, cat_order), CHECK (cat_order > 0) ); +CREATE TABLE group_room_categories ( group_id TEXT NOT NULL, category_id TEXT NOT NULL, profile TEXT NOT NULL, is_public BOOLEAN NOT NULL, UNIQUE (group_id, category_id) ); +CREATE TABLE group_summary_users ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, role_id TEXT NOT NULL, user_order BIGINT NOT NULL, is_public BOOLEAN NOT NULL ); +CREATE INDEX group_summary_users_g_idx ON group_summary_users(group_id); +CREATE TABLE group_summary_roles ( group_id TEXT NOT NULL, role_id TEXT NOT NULL, role_order BIGINT NOT NULL, UNIQUE (group_id, role_id, role_order), CHECK (role_order > 0) ); +CREATE TABLE group_roles ( group_id TEXT NOT NULL, role_id TEXT NOT NULL, profile TEXT NOT NULL, is_public BOOLEAN NOT NULL, UNIQUE (group_id, role_id) ); +CREATE TABLE group_attestations_renewals ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, valid_until_ms BIGINT NOT NULL ); +CREATE INDEX group_attestations_renewals_g_idx ON group_attestations_renewals(group_id, user_id); +CREATE INDEX group_attestations_renewals_u_idx ON group_attestations_renewals(user_id); +CREATE INDEX group_attestations_renewals_v_idx ON group_attestations_renewals(valid_until_ms); +CREATE TABLE group_attestations_remote ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, valid_until_ms BIGINT NOT NULL, attestation_json TEXT NOT NULL ); +CREATE INDEX group_attestations_remote_g_idx ON group_attestations_remote(group_id, user_id); +CREATE INDEX group_attestations_remote_u_idx ON group_attestations_remote(user_id); +CREATE INDEX group_attestations_remote_v_idx ON group_attestations_remote(valid_until_ms); +CREATE TABLE local_group_membership ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, is_admin BOOLEAN NOT NULL, membership TEXT NOT NULL, is_publicised BOOLEAN NOT NULL, content TEXT NOT NULL ); +CREATE INDEX local_group_membership_u_idx ON local_group_membership(user_id, group_id); +CREATE INDEX local_group_membership_g_idx ON local_group_membership(group_id); +CREATE TABLE local_group_updates ( stream_id BIGINT NOT NULL, group_id TEXT NOT NULL, user_id TEXT NOT NULL, type TEXT NOT NULL, content TEXT NOT NULL ); +CREATE TABLE remote_profile_cache ( user_id TEXT NOT NULL, displayname TEXT, avatar_url TEXT, last_check BIGINT NOT NULL ); +CREATE UNIQUE INDEX remote_profile_cache_user_id ON remote_profile_cache(user_id); +CREATE INDEX remote_profile_cache_time ON remote_profile_cache(last_check); +CREATE TABLE IF NOT EXISTS "deleted_pushers" ( stream_id BIGINT NOT NULL, app_id TEXT NOT NULL, pushkey TEXT NOT NULL, user_id TEXT NOT NULL ); +CREATE INDEX deleted_pushers_stream_id ON deleted_pushers (stream_id); +CREATE TABLE IF NOT EXISTS "groups" ( group_id TEXT NOT NULL, name TEXT, avatar_url TEXT, short_description TEXT, long_description TEXT, is_public BOOL NOT NULL , join_policy TEXT NOT NULL DEFAULT 'invite'); +CREATE UNIQUE INDEX groups_idx ON groups(group_id); +CREATE TABLE IF NOT EXISTS "user_directory" ( user_id TEXT NOT NULL, room_id TEXT, display_name TEXT, avatar_url TEXT ); +CREATE INDEX user_directory_room_idx ON user_directory(room_id); +CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id); +CREATE TABLE event_push_actions_staging ( event_id TEXT NOT NULL, user_id TEXT NOT NULL, actions TEXT NOT NULL, notif SMALLINT NOT NULL, highlight SMALLINT NOT NULL ); +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id); +CREATE TABLE users_pending_deactivation ( user_id TEXT NOT NULL ); +CREATE UNIQUE INDEX group_invites_g_idx ON group_invites(group_id, user_id); +CREATE UNIQUE INDEX group_users_g_idx ON group_users(group_id, user_id); +CREATE INDEX group_users_u_idx ON group_users(user_id); +CREATE INDEX group_invites_u_idx ON group_invites(user_id); +CREATE UNIQUE INDEX group_rooms_g_idx ON group_rooms(group_id, room_id); +CREATE INDEX group_rooms_r_idx ON group_rooms(room_id); +CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, device_id TEXT, timestamp BIGINT NOT NULL ); +CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp); +CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp); +CREATE TABLE erased_users ( user_id TEXT NOT NULL ); +CREATE UNIQUE INDEX erased_users_user ON erased_users(user_id); +CREATE TABLE monthly_active_users ( user_id TEXT NOT NULL, timestamp BIGINT NOT NULL ); +CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id); +CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp); +CREATE TABLE IF NOT EXISTS "e2e_room_keys_versions" ( user_id TEXT NOT NULL, version BIGINT NOT NULL, algorithm TEXT NOT NULL, auth_data TEXT NOT NULL, deleted SMALLINT DEFAULT 0 NOT NULL ); +CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version); +CREATE TABLE IF NOT EXISTS "e2e_room_keys" ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, session_id TEXT NOT NULL, version BIGINT NOT NULL, first_message_index INT, forwarded_count INT, is_verified BOOLEAN, session_data TEXT NOT NULL ); +CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys(user_id, room_id, session_id); +CREATE TABLE users_who_share_private_rooms ( user_id TEXT NOT NULL, other_user_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id); +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id); +CREATE TABLE user_threepid_id_server ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, id_server TEXT NOT NULL ); +CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server( user_id, medium, address, id_server ); +CREATE TABLE users_in_public_rooms ( user_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); +CREATE TABLE account_validity ( user_id TEXT PRIMARY KEY, expiration_ts_ms BIGINT NOT NULL, email_sent BOOLEAN NOT NULL, renewal_token TEXT ); +CREATE TABLE event_relations ( event_id TEXT NOT NULL, relates_to_id TEXT NOT NULL, relation_type TEXT NOT NULL, aggregation_key TEXT ); +CREATE UNIQUE INDEX event_relations_id ON event_relations(event_id); +CREATE INDEX event_relations_relates ON event_relations(relates_to_id, relation_type, aggregation_key); +CREATE TABLE stats_stream_pos ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_id BIGINT, CHECK (Lock='X') ); +CREATE TABLE user_stats ( user_id TEXT NOT NULL, ts BIGINT NOT NULL, bucket_size INT NOT NULL, public_rooms INT NOT NULL, private_rooms INT NOT NULL ); +CREATE UNIQUE INDEX user_stats_user_ts ON user_stats(user_id, ts); +CREATE TABLE room_stats ( room_id TEXT NOT NULL, ts BIGINT NOT NULL, bucket_size INT NOT NULL, current_state_events INT NOT NULL, joined_members INT NOT NULL, invited_members INT NOT NULL, left_members INT NOT NULL, banned_members INT NOT NULL, state_events INT NOT NULL ); +CREATE UNIQUE INDEX room_stats_room_ts ON room_stats(room_id, ts); +CREATE TABLE room_state ( room_id TEXT NOT NULL, join_rules TEXT, history_visibility TEXT, encryption TEXT, name TEXT, topic TEXT, avatar TEXT, canonical_alias TEXT ); +CREATE UNIQUE INDEX room_state_room ON room_state(room_id); +CREATE TABLE room_stats_earliest_token ( room_id TEXT NOT NULL, token BIGINT NOT NULL ); +CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token(room_id); +CREATE INDEX access_tokens_device_id ON access_tokens (user_id, device_id); +CREATE INDEX user_ips_device_id ON user_ips (user_id, device_id, last_seen); +CREATE INDEX event_contains_url_index ON events (room_id, topological_ordering, stream_ordering); +CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering); +CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering); +CREATE INDEX current_state_events_member_index ON current_state_events (state_key); +CREATE INDEX device_inbox_stream_id_user_id ON device_inbox (stream_id, user_id); +CREATE INDEX device_lists_stream_user_id ON device_lists_stream (user_id, device_id); +CREATE INDEX local_media_repository_url_idx ON local_media_repository (created_ts); +CREATE INDEX user_ips_last_seen ON user_ips (user_id, last_seen); +CREATE INDEX user_ips_last_seen_only ON user_ips (last_seen); +CREATE INDEX users_creation_ts ON users (creation_ts); +CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups (state_group); +CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache (user_id, device_id); +CREATE TABLE sqlite_stat1(tbl,idx,stat); +CREATE INDEX state_groups_state_type_idx ON state_groups_state(state_group, type, state_key); +CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties (user_id); +CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips (user_id, access_token, ip); diff --git a/synapse/storage/schema/full_schemas/README.txt b/synapse/storage/schema/full_schemas/README.txt new file mode 100644 index 0000000000..12d4eb0746 --- /dev/null +++ b/synapse/storage/schema/full_schemas/README.txt @@ -0,0 +1,14 @@ +Building full schema dumps +========================== + +Postgres +-------- + +$ pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner $DATABASE_NAME| sed -e '/^--/d' -e 's/public.//g' -e '/^SET /d' -e '/^SELECT /d' > full.sql.postgres + +SQLite +------ + +$ sqlite3 $DATABASE_FILE ".schema" > full.sql.sqlite + +Delete the CREATE statements for "schema_version", "applied_schema_deltas", and "applied_module_schemas". \ No newline at end of file -- cgit 1.5.1 From 7f81b967ca1d4004832e96513ad33e802f9a6f78 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 3 Jun 2019 22:23:40 +1000 Subject: fix schemas --- synapse/storage/prepare_database.py | 4 +- .../schema/full_schemas/54/full.sql.postgres | 72 +++++----------------- .../storage/schema/full_schemas/54/full.sql.sqlite | 1 - synapse/storage/schema/full_schemas/README.txt | 4 +- tests/handlers/test_user_directory.py | 2 + 5 files changed, 22 insertions(+), 61 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 07478b6672..b81c05369f 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -146,9 +146,9 @@ def _setup_new_database(cur, database_engine): directory_entries = os.listdir(sql_dir) - for filename in fnmatch.filter(directory_entries, "*.sql") + fnmatch.filter( + for filename in sorted(fnmatch.filter(directory_entries, "*.sql") + fnmatch.filter( directory_entries, "*.sql." + specific - ): + )): sql_loc = os.path.join(sql_dir, filename) logger.debug("Applying schema %s", sql_loc) executescript(cur, sql_loc) diff --git a/synapse/storage/schema/full_schemas/54/full.sql.postgres b/synapse/storage/schema/full_schemas/54/full.sql.postgres index 5fb54cfe77..ea3859fd24 100644 --- a/synapse/storage/schema/full_schemas/54/full.sql.postgres +++ b/synapse/storage/schema/full_schemas/54/full.sql.postgres @@ -60,21 +60,6 @@ CREATE TABLE application_services_txns ( ); - -CREATE TABLE applied_module_schemas ( - module_name text NOT NULL, - file text NOT NULL -); - - - -CREATE TABLE applied_schema_deltas ( - version integer NOT NULL, - file text NOT NULL -); - - - CREATE TABLE appservice_room_list ( appservice_id text NOT NULL, network_id text NOT NULL, @@ -475,7 +460,7 @@ CREATE TABLE group_roles ( group_id text NOT NULL, role_id text NOT NULL, profile text NOT NULL, - is_boolean NOT NULL + is_public boolean NOT NULL ); @@ -484,7 +469,7 @@ CREATE TABLE group_room_categories ( group_id text NOT NULL, category_id text NOT NULL, profile text NOT NULL, - is_boolean NOT NULL + is_public boolean NOT NULL ); @@ -492,7 +477,7 @@ CREATE TABLE group_room_categories ( CREATE TABLE group_rooms ( group_id text NOT NULL, room_id text NOT NULL, - is_boolean NOT NULL + is_public boolean NOT NULL ); @@ -520,7 +505,7 @@ CREATE TABLE group_summary_rooms ( room_id text NOT NULL, category_id text NOT NULL, room_order bigint NOT NULL, - is_boolean NOT NULL, + is_public boolean NOT NULL, CONSTRAINT group_summary_rooms_room_order_check CHECK ((room_order > 0)) ); @@ -531,7 +516,7 @@ CREATE TABLE group_summary_users ( user_id text NOT NULL, role_id text NOT NULL, user_order bigint NOT NULL, - is_boolean NOT NULL + is_public boolean NOT NULL ); @@ -540,7 +525,7 @@ CREATE TABLE group_users ( group_id text NOT NULL, user_id text NOT NULL, is_admin boolean NOT NULL, - is_boolean NOT NULL + is_public boolean NOT NULL ); @@ -551,7 +536,7 @@ CREATE TABLE groups ( avatar_url text, short_description text, long_description text, - is_boolean NOT NULL, + is_public boolean NOT NULL, join_policy text DEFAULT 'invite'::text NOT NULL ); @@ -578,7 +563,7 @@ CREATE TABLE local_group_membership ( user_id text NOT NULL, is_admin boolean NOT NULL, membership text NOT NULL, - is_sed boolean NOT NULL, + is_publicised boolean NOT NULL, content text NOT NULL ); @@ -695,7 +680,7 @@ CREATE TABLE profiles ( -CREATE TABLE room_list_stream ( +CREATE TABLE public_room_list_stream ( stream_id bigint NOT NULL, room_id text NOT NULL, visibility boolean NOT NULL, @@ -966,21 +951,12 @@ CREATE TABLE room_tags_revisions ( CREATE TABLE rooms ( room_id text NOT NULL, - is_boolean, + is_public boolean, creator text ); -CREATE TABLE schema_version ( - lock character(1) DEFAULT 'X'::bpchar NOT NULL, - version integer NOT NULL, - upgraded boolean NOT NULL, - CONSTRAINT schema_version_lock_check CHECK ((lock = 'X'::bpchar)) -); - - - CREATE TABLE server_keys_json ( server_name text NOT NULL, key_id text NOT NULL, @@ -1135,7 +1111,7 @@ CREATE TABLE user_stats ( user_id text NOT NULL, ts bigint NOT NULL, bucket_size integer NOT NULL, - rooms integer NOT NULL, + public_rooms integer NOT NULL, private_rooms integer NOT NULL ); @@ -1175,7 +1151,7 @@ CREATE TABLE users ( -CREATE TABLE users_in_rooms ( +CREATE TABLE users_in_public_rooms ( user_id text NOT NULL, room_id text NOT NULL ); @@ -1225,17 +1201,6 @@ ALTER TABLE ONLY application_services_txns ADD CONSTRAINT application_services_txns_as_id_txn_id_key UNIQUE (as_id, txn_id); - -ALTER TABLE ONLY applied_module_schemas - ADD CONSTRAINT applied_module_schemas_module_name_file_key UNIQUE (module_name, file); - - - -ALTER TABLE ONLY applied_schema_deltas - ADD CONSTRAINT applied_schema_deltas_version_file_key UNIQUE (version, file); - - - ALTER TABLE ONLY appservice_stream_position ADD CONSTRAINT appservice_stream_position_lock_key UNIQUE (lock); @@ -1521,11 +1486,6 @@ ALTER TABLE ONLY rooms -ALTER TABLE ONLY schema_version - ADD CONSTRAINT schema_version_lock_key UNIQUE (lock); - - - ALTER TABLE ONLY server_keys_json ADD CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server); @@ -1846,15 +1806,15 @@ CREATE INDEX presence_stream_user_id ON presence_stream USING btree (user_id); -CREATE INDEX room_index ON rooms USING btree (is_; +CREATE INDEX public_room_index ON rooms USING btree (is_public); -CREATE INDEX room_list_stream_idx ON room_list_stream USING btree (stream_id); +CREATE INDEX public_room_list_stream_idx ON public_room_list_stream USING btree (stream_id); -CREATE INDEX room_list_stream_rm_idx ON room_list_stream USING btree (room_id, stream_id); +CREATE INDEX public_room_list_stream_rm_idx ON public_room_list_stream USING btree (room_id, stream_id); @@ -2022,7 +1982,7 @@ CREATE INDEX user_threepids_user_id ON user_threepids USING btree (user_id); -CREATE UNIQUE INDEX users_in_rooms_u_idx ON users_in_rooms USING btree (user_id, room_id); +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms USING btree (user_id, room_id); diff --git a/synapse/storage/schema/full_schemas/54/full.sql.sqlite b/synapse/storage/schema/full_schemas/54/full.sql.sqlite index 0b60a6c789..be9295e4c9 100644 --- a/synapse/storage/schema/full_schemas/54/full.sql.sqlite +++ b/synapse/storage/schema/full_schemas/54/full.sql.sqlite @@ -255,7 +255,6 @@ CREATE INDEX user_ips_last_seen_only ON user_ips (last_seen); CREATE INDEX users_creation_ts ON users (creation_ts); CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups (state_group); CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache (user_id, device_id); -CREATE TABLE sqlite_stat1(tbl,idx,stat); CREATE INDEX state_groups_state_type_idx ON state_groups_state(state_group, type, state_key); CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties (user_id); CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips (user_id, access_token, ip); diff --git a/synapse/storage/schema/full_schemas/README.txt b/synapse/storage/schema/full_schemas/README.txt index 12d4eb0746..df49f9b39e 100644 --- a/synapse/storage/schema/full_schemas/README.txt +++ b/synapse/storage/schema/full_schemas/README.txt @@ -4,11 +4,11 @@ Building full schema dumps Postgres -------- -$ pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner $DATABASE_NAME| sed -e '/^--/d' -e 's/public.//g' -e '/^SET /d' -e '/^SELECT /d' > full.sql.postgres +$ pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner $DATABASE_NAME| sed -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > full.sql.postgres SQLite ------ $ sqlite3 $DATABASE_FILE ".schema" > full.sql.sqlite -Delete the CREATE statements for "schema_version", "applied_schema_deltas", and "applied_module_schemas". \ No newline at end of file +Delete the CREATE statements for "sqlite_stat1", "schema_version", "applied_schema_deltas", and "applied_module_schemas". \ No newline at end of file diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 9021e647fe..b919694f54 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -96,6 +96,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.get_success(self.handler.handle_user_deactivated(r_user_id)) self.store.remove_from_user_dir.called_once_with(r_user_id) + @unittest.DEBUG def test_private_room(self): """ A user can be searched for only by people that are either in a public @@ -340,6 +341,7 @@ class TestUserDirSearchDisabled(unittest.HomeserverTestCase): return hs + @unittest.DEBUG def test_disabling_room_list(self): self.config.user_directory_search_enabled = True -- cgit 1.5.1 From be452fc9ace4f501398be769766e3f8bbd798571 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 3 Jun 2019 22:24:23 +1000 Subject: more fix --- .../schema/full_schemas/54/stream_positions.sql | 38 ++++++++++++++++++++++ tests/handlers/test_user_directory.py | 2 -- 2 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 synapse/storage/schema/full_schemas/54/stream_positions.sql (limited to 'synapse/storage') diff --git a/synapse/storage/schema/full_schemas/54/stream_positions.sql b/synapse/storage/schema/full_schemas/54/stream_positions.sql new file mode 100644 index 0000000000..d6433a5af2 --- /dev/null +++ b/synapse/storage/schema/full_schemas/54/stream_positions.sql @@ -0,0 +1,38 @@ + +INSERT INTO appservice_stream_position (stream_ordering) SELECT COALESCE(MAX(stream_ordering), 0) FROM events; +INSERT INTO federation_stream_position (type, stream_id) VALUES ('federation', -1); +INSERT INTO federation_stream_position (type, stream_id) SELECT 'events', coalesce(max(stream_ordering), -1) FROM events; +INSERT INTO user_directory_stream_pos (stream_id) VALUES (null); +INSERT INTO stats_stream_pos (stream_id) VALUES (null); + +--- User dir population + +-- Set up staging tables +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_user_directory_createtables', '{}'); + +-- Run through each room and update the user directory according to who is in it +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'); + +-- Insert all users, if search_all_users is on +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users'); + +--- Stats population + +-- Set up staging tables +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_stats_createtables', '{}'); + +-- Run through each room and update stats +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_process_rooms', '{}', 'populate_stats_createtables'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_cleanup', '{}', 'populate_stats_process_rooms'); diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index b919694f54..9021e647fe 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -96,7 +96,6 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.get_success(self.handler.handle_user_deactivated(r_user_id)) self.store.remove_from_user_dir.called_once_with(r_user_id) - @unittest.DEBUG def test_private_room(self): """ A user can be searched for only by people that are either in a public @@ -341,7 +340,6 @@ class TestUserDirSearchDisabled(unittest.HomeserverTestCase): return hs - @unittest.DEBUG def test_disabling_room_list(self): self.config.user_directory_search_enabled = True -- cgit 1.5.1 From 4e75c5e02a6dead87e9a24cbdb8fb015221070ce Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 3 Jun 2019 22:42:12 +1000 Subject: WHY IS THIS CALLED A SLIGHTLY DIFFERENT THING --- synapse/storage/schema/full_schemas/54/stream_positions.sql | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/full_schemas/54/stream_positions.sql b/synapse/storage/schema/full_schemas/54/stream_positions.sql index d6433a5af2..0febedcc5e 100644 --- a/synapse/storage/schema/full_schemas/54/stream_positions.sql +++ b/synapse/storage/schema/full_schemas/54/stream_positions.sql @@ -4,6 +4,7 @@ INSERT INTO federation_stream_position (type, stream_id) VALUES ('federation', - INSERT INTO federation_stream_position (type, stream_id) SELECT 'events', coalesce(max(stream_ordering), -1) FROM events; INSERT INTO user_directory_stream_pos (stream_id) VALUES (null); INSERT INTO stats_stream_pos (stream_id) VALUES (null); +INSERT INTO event_push_summary_stream_ordering (stream_ordering) VALUES (0); --- User dir population -- cgit 1.5.1 From 2198b7ce2a1316dfaf8dde061c1a6f30a818ed5a Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 4 Jun 2019 01:06:00 +1000 Subject: add stuff in bg updates --- .../schema/full_schemas/54/full.sql.postgres | 72 +++++++++++++++++++--- 1 file changed, 62 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/full_schemas/54/full.sql.postgres b/synapse/storage/schema/full_schemas/54/full.sql.postgres index ea3859fd24..098434356f 100644 --- a/synapse/storage/schema/full_schemas/54/full.sql.postgres +++ b/synapse/storage/schema/full_schemas/54/full.sql.postgres @@ -3,12 +3,6 @@ -CREATE TABLE _extremities_to_check ( - event_id text -); - - - CREATE TABLE access_tokens ( id bigint NOT NULL, user_id text NOT NULL, @@ -60,6 +54,7 @@ CREATE TABLE application_services_txns ( ); + CREATE TABLE appservice_room_list ( appservice_id text NOT NULL, network_id text NOT NULL, @@ -1201,6 +1196,7 @@ ALTER TABLE ONLY application_services_txns ADD CONSTRAINT application_services_txns_as_id_txn_id_key UNIQUE (as_id, txn_id); + ALTER TABLE ONLY appservice_stream_position ADD CONSTRAINT appservice_stream_position_lock_key UNIQUE (lock); @@ -1526,7 +1522,7 @@ ALTER TABLE ONLY users -CREATE INDEX _extremities_to_check_id ON _extremities_to_check USING btree (event_id); +CREATE INDEX access_tokens_device_id ON access_tokens USING btree (user_id, device_id); @@ -1554,6 +1550,10 @@ CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream USING +CREATE INDEX current_state_events_member_index ON current_state_events USING btree (state_key) WHERE (type = 'm.room.member'::text); + + + CREATE INDEX deleted_pushers_stream_id ON deleted_pushers USING btree (stream_id); @@ -1570,7 +1570,7 @@ CREATE INDEX device_federation_outbox_id ON device_federation_outbox USING btree -CREATE INDEX device_inbox_stream_id ON device_inbox USING btree (stream_id); +CREATE INDEX device_inbox_stream_id_user_id ON device_inbox USING btree (stream_id, user_id); @@ -1594,10 +1594,22 @@ CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes USI +CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache USING btree (user_id, device_id); + + + +CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties USING btree (user_id); + + + CREATE INDEX device_lists_stream_id ON device_lists_stream USING btree (stream_id, user_id); +CREATE INDEX device_lists_stream_user_id ON device_lists_stream USING btree (user_id, device_id); + + + CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys USING btree (user_id, room_id, session_id); @@ -1638,10 +1650,18 @@ CREATE INDEX evauth_edges_id ON event_auth USING btree (event_id); +CREATE INDEX event_contains_url_index ON events USING btree (room_id, topological_ordering, stream_ordering) WHERE ((contains_url = true) AND (outlier = false)); + + + CREATE INDEX event_json_room_id ON event_json USING btree (room_id); +CREATE INDEX event_push_actions_highlights_index ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering) WHERE (highlight = 1); + + + CREATE INDEX event_push_actions_rm_tokens ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering); @@ -1658,6 +1678,10 @@ CREATE INDEX event_push_actions_stream_ordering ON event_push_actions USING btre +CREATE INDEX event_push_actions_u_highlight ON event_push_actions USING btree (user_id, stream_ordering); + + + CREATE INDEX event_push_summary_user_rm ON event_push_summary USING btree (user_id, room_id); @@ -1678,10 +1702,18 @@ CREATE INDEX event_search_ev_ridx ON event_search USING btree (room_id); +CREATE UNIQUE INDEX event_search_event_id_idx ON event_search USING btree (event_id); + + + CREATE INDEX event_search_fts_idx ON event_search USING gin (vector); +CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups USING btree (state_group); + + + CREATE INDEX events_order_room ON events USING btree (room_id, topological_ordering, stream_ordering); @@ -1786,6 +1818,10 @@ CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repositor +CREATE INDEX local_media_repository_url_idx ON local_media_repository USING btree (created_ts) WHERE (url_cache IS NOT NULL); + + + CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users USING btree ("timestamp"); @@ -1914,7 +1950,7 @@ CREATE INDEX state_group_edges_prev_idx ON state_group_edges USING btree (prev_s -CREATE INDEX state_groups_state_id ON state_groups_state USING btree (state_group); +CREATE INDEX state_groups_state_type_idx ON state_groups_state USING btree (state_group, type, state_key); @@ -1962,7 +1998,19 @@ CREATE INDEX user_filters_by_user_id_filter_id ON user_filters USING btree (user -CREATE INDEX user_ips_user_ip ON user_ips USING btree (user_id, access_token, ip); +CREATE INDEX user_ips_device_id ON user_ips USING btree (user_id, device_id, last_seen); + + + +CREATE INDEX user_ips_last_seen ON user_ips USING btree (user_id, last_seen); + + + +CREATE INDEX user_ips_last_seen_only ON user_ips USING btree (last_seen); + + + +CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips USING btree (user_id, access_token, ip); @@ -1982,6 +2030,10 @@ CREATE INDEX user_threepids_user_id ON user_threepids USING btree (user_id); +CREATE INDEX users_creation_ts ON users USING btree (creation_ts); + + + CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms USING btree (user_id, room_id); -- cgit 1.5.1 From fa4b54aca57bebc94e2b763abdae79343a08f969 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 3 Jun 2019 17:06:54 +0100 Subject: Ignore room state with null bytes in for room stats --- synapse/storage/stats.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 1c0b183a56..1f39ef211a 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -328,6 +328,21 @@ class StatsStore(StateDeltasStore): room_id (str) fields (dict[str:Any]) """ + + # For whatever reason some of the fields may contain null bytes, which + # postgres isn't a fan of, so we replace those fields with null. + for col in ( + "join_rules", + "history_visibility", + "encryption", + "name", + "topic", + "avatar", + "canonical_alias" + ): + if "\0" in fields.get(col, ""): + fields[col] = None + return self._simple_upsert( table="room_state", keyvalues={"room_id": room_id}, -- cgit 1.5.1 From 0a56966f7d4879f9d517c96a3c714accdce4e17f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 3 Jun 2019 17:42:52 +0100 Subject: Fix --- synapse/storage/stats.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 1f39ef211a..ff266b09b0 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -340,7 +340,8 @@ class StatsStore(StateDeltasStore): "avatar", "canonical_alias" ): - if "\0" in fields.get(col, ""): + field = fields.get(col) + if field and "\0" in field: fields[col] = None return self._simple_upsert( -- cgit 1.5.1 From 75538813fcd0403ec8915484a813b99e6eb256c6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 Jun 2019 15:45:46 +0100 Subject: Fix background updates to handle redactions/rejections (#5352) * Fix background updates to handle redactions/rejections In background updates based on current state delta stream we need to handle that we may not have all the events (or at least that `get_events` may raise an exception). --- changelog.d/5352.bugfix | 1 + synapse/handlers/presence.py | 11 ++++--- synapse/handlers/stats.py | 18 ++++++++---- synapse/storage/events_worker.py | 37 ++++++++++++++++++++++++ tests/handlers/test_stats.py | 62 ++++++++++++++++++++++++++++++++++++++-- 5 files changed, 117 insertions(+), 12 deletions(-) create mode 100644 changelog.d/5352.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/5352.bugfix b/changelog.d/5352.bugfix new file mode 100644 index 0000000000..2ffefe5a68 --- /dev/null +++ b/changelog.d/5352.bugfix @@ -0,0 +1 @@ +Fix room stats and presence background updates to correctly handle missing events. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 6209858bbb..e49c8203ef 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -828,14 +828,17 @@ class PresenceHandler(object): # joins. continue - event = yield self.store.get_event(event_id) - if event.content.get("membership") != Membership.JOIN: + event = yield self.store.get_event(event_id, allow_none=True) + if not event or event.content.get("membership") != Membership.JOIN: # We only care about joins continue if prev_event_id: - prev_event = yield self.store.get_event(prev_event_id) - if prev_event.content.get("membership") == Membership.JOIN: + prev_event = yield self.store.get_event(prev_event_id, allow_none=True) + if ( + prev_event + and prev_event.content.get("membership") == Membership.JOIN + ): # Ignore changes to join events. continue diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 0e92b405ba..7ad16c8566 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -115,6 +115,7 @@ class StatsHandler(StateDeltasHandler): event_id = delta["event_id"] stream_id = delta["stream_id"] prev_event_id = delta["prev_event_id"] + stream_pos = delta["stream_id"] logger.debug("Handling: %r %r, %s", typ, state_key, event_id) @@ -136,10 +137,15 @@ class StatsHandler(StateDeltasHandler): event_content = {} if event_id is not None: - event_content = (yield self.store.get_event(event_id)).content or {} + event = yield self.store.get_event(event_id, allow_none=True) + if event: + event_content = event.content or {} + + # We use stream_pos here rather than fetch by event_id as event_id + # may be None + now = yield self.store.get_received_ts_by_stream_pos(stream_pos) # quantise time to the nearest bucket - now = yield self.store.get_received_ts(event_id) now = (now // 1000 // self.stats_bucket_size) * self.stats_bucket_size if typ == EventTypes.Member: @@ -149,9 +155,11 @@ class StatsHandler(StateDeltasHandler): # compare them. prev_event_content = {} if prev_event_id is not None: - prev_event_content = ( - yield self.store.get_event(prev_event_id) - ).content + prev_event = yield self.store.get_event( + prev_event_id, allow_none=True, + ) + if prev_event: + prev_event_content = prev_event.content membership = event_content.get("membership", Membership.LEAVE) prev_membership = prev_event_content.get("membership", Membership.LEAVE) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 1782428048..cc7df5cf14 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -78,6 +78,43 @@ class EventsWorkerStore(SQLBaseStore): desc="get_received_ts", ) + def get_received_ts_by_stream_pos(self, stream_ordering): + """Given a stream ordering get an approximate timestamp of when it + happened. + + This is done by simply taking the received ts of the first event that + has a stream ordering greater than or equal to the given stream pos. + If none exists returns the current time, on the assumption that it must + have happened recently. + + Args: + stream_ordering (int) + + Returns: + Deferred[int] + """ + + def _get_approximate_received_ts_txn(txn): + sql = """ + SELECT received_ts FROM events + WHERE stream_ordering >= ? + LIMIT 1 + """ + + txn.execute(sql, (stream_ordering,)) + row = txn.fetchone() + if row and row[0]: + ts = row[0] + else: + ts = self.clock.time_msec() + + return ts + + return self.runInteraction( + "get_approximate_received_ts", + _get_approximate_received_ts_txn, + ) + @defer.inlineCallbacks def get_event( self, diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py index 249aba3d59..2710c991cf 100644 --- a/tests/handlers/test_stats.py +++ b/tests/handlers/test_stats.py @@ -204,7 +204,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): "a2": {"membership": "not a real thing"}, } - def get_event(event_id): + def get_event(event_id, allow_none=True): m = Mock() m.content = events[event_id] d = defer.Deferred() @@ -224,7 +224,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): "room_id": "room", "event_id": "a1", "prev_event_id": "a2", - "stream_id": "bleb", + "stream_id": 60, } ] @@ -241,7 +241,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): "room_id": "room", "event_id": "a2", "prev_event_id": "a1", - "stream_id": "bleb", + "stream_id": 100, } ] @@ -249,3 +249,59 @@ class StatsRoomTests(unittest.HomeserverTestCase): self.assertEqual( f.value.args[0], "'not a real thing' is not a valid membership" ) + + def test_redacted_prev_event(self): + """ + If the prev_event does not exist, then it is assumed to be a LEAVE. + """ + u1 = self.register_user("u1", "pass") + u1_token = self.login("u1", "pass") + + room_1 = self.helper.create_room_as(u1, tok=u1_token) + + # Do the initial population of the user directory via the background update + self._add_background_updates() + + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + events = { + "a1": None, + "a2": {"membership": Membership.JOIN}, + } + + def get_event(event_id, allow_none=True): + if events.get(event_id): + m = Mock() + m.content = events[event_id] + else: + m = None + d = defer.Deferred() + self.reactor.callLater(0.0, d.callback, m) + return d + + def get_received_ts(event_id): + return defer.succeed(1) + + self.store.get_received_ts = get_received_ts + self.store.get_event = get_event + + deltas = [ + { + "type": EventTypes.Member, + "state_key": "some_user:test", + "room_id": room_1, + "event_id": "a2", + "prev_event_id": "a1", + "stream_id": 100, + } + ] + + # Handle our fake deltas, which has a user going from LEAVE -> JOIN. + self.get_success(self.handler._handle_deltas(deltas)) + + # One delta, with two joined members -- the room creator, and our fake + # user. + r = self.get_success(self.store.get_deltas_for_room(room_1, 0)) + self.assertEqual(len(r), 1) + self.assertEqual(r[0]["joined_members"], 2) -- cgit 1.5.1 From 6362e3af14cff28bc51d0e66d207b84bae7fd422 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 6 Jun 2019 04:20:35 +1000 Subject: add more comments --- synapse/storage/schema/full_schemas/README.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/full_schemas/README.txt b/synapse/storage/schema/full_schemas/README.txt index df49f9b39e..d3f6401344 100644 --- a/synapse/storage/schema/full_schemas/README.txt +++ b/synapse/storage/schema/full_schemas/README.txt @@ -1,6 +1,8 @@ Building full schema dumps ========================== +These schemas need to be made from a database that has had all background updates run. + Postgres -------- @@ -11,4 +13,7 @@ SQLite $ sqlite3 $DATABASE_FILE ".schema" > full.sql.sqlite +After +----- + Delete the CREATE statements for "sqlite_stat1", "schema_version", "applied_schema_deltas", and "applied_module_schemas". \ No newline at end of file -- cgit 1.5.1 From 7f08a3523a2018b66cd96cccf30602c8f687b495 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 6 Jun 2019 11:09:38 +0100 Subject: Better words --- synapse/storage/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 0b5f5f9663..6f7f65d96b 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -592,7 +592,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) def get_max_topological_token(self, room_id, stream_key): - """Get the max topological token in a room that before given stream + """Get the max topological token in a room before the given stream ordering. Args: -- cgit 1.5.1 From 3b6645d3bf3d3be449e1162e4135b677a1086ade Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 7 Jun 2019 01:20:58 +1000 Subject: remove background updates that arent needed --- .../schema/full_schemas/54/stream_positions.sql | 34 +--------------------- 1 file changed, 1 insertion(+), 33 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/full_schemas/54/stream_positions.sql b/synapse/storage/schema/full_schemas/54/stream_positions.sql index 0febedcc5e..084a70db65 100644 --- a/synapse/storage/schema/full_schemas/54/stream_positions.sql +++ b/synapse/storage/schema/full_schemas/54/stream_positions.sql @@ -4,36 +4,4 @@ INSERT INTO federation_stream_position (type, stream_id) VALUES ('federation', - INSERT INTO federation_stream_position (type, stream_id) SELECT 'events', coalesce(max(stream_ordering), -1) FROM events; INSERT INTO user_directory_stream_pos (stream_id) VALUES (null); INSERT INTO stats_stream_pos (stream_id) VALUES (null); -INSERT INTO event_push_summary_stream_ordering (stream_ordering) VALUES (0); - ---- User dir population - --- Set up staging tables -INSERT INTO background_updates (update_name, progress_json) VALUES - ('populate_user_directory_createtables', '{}'); - --- Run through each room and update the user directory according to who is in it -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'); - --- Insert all users, if search_all_users is on -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms'); - --- Clean up staging tables -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users'); - ---- Stats population - --- Set up staging tables -INSERT INTO background_updates (update_name, progress_json) VALUES - ('populate_stats_createtables', '{}'); - --- Run through each room and update stats -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_stats_process_rooms', '{}', 'populate_stats_createtables'); - --- Clean up staging tables -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_stats_cleanup', '{}', 'populate_stats_process_rooms'); +INSERT INTO event_push_summary_stream_ordering (stream_ordering) VALUES (0); \ No newline at end of file -- cgit 1.5.1 From 837e32ef551b9c53d23615cdde56cc9babcc9059 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 7 Jun 2019 01:49:25 +1000 Subject: just user dir? --- .../schema/full_schemas/54/stream_positions.sql | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/full_schemas/54/stream_positions.sql b/synapse/storage/schema/full_schemas/54/stream_positions.sql index 084a70db65..575ab6b354 100644 --- a/synapse/storage/schema/full_schemas/54/stream_positions.sql +++ b/synapse/storage/schema/full_schemas/54/stream_positions.sql @@ -4,4 +4,22 @@ INSERT INTO federation_stream_position (type, stream_id) VALUES ('federation', - INSERT INTO federation_stream_position (type, stream_id) SELECT 'events', coalesce(max(stream_ordering), -1) FROM events; INSERT INTO user_directory_stream_pos (stream_id) VALUES (null); INSERT INTO stats_stream_pos (stream_id) VALUES (null); -INSERT INTO event_push_summary_stream_ordering (stream_ordering) VALUES (0); \ No newline at end of file +INSERT INTO event_push_summary_stream_ordering (stream_ordering) VALUES (0); + +--- User dir population + +-- Set up staging tables +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_user_directory_createtables', '{}'); + +-- Run through each room and update the user directory according to who is in it +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'); + +-- Insert all users, if search_all_users is on +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users'); -- cgit 1.5.1 From 3719680ee42b72b8480fa76a1455576897b65ef0 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 6 Jun 2019 17:34:07 +0100 Subject: Add ability to perform password reset via email without trusting the identity server (#5377) Sends password reset emails from the homeserver instead of proxying to the identity server. This is now the default behaviour for security reasons. If you wish to continue proxying password reset requests to the identity server you must now enable the email.trust_identity_server_for_password_resets option. This PR is a culmination of 3 smaller PRs which have each been separately reviewed: * #5308 * #5345 * #5368 --- changelog.d/5377.feature | 1 + docs/sample_config.yaml | 60 ++++- synapse/api/errors.py | 9 + synapse/app/homeserver.py | 1 + synapse/config/emailconfig.py | 153 +++++++++-- synapse/handlers/auth.py | 64 ++++- synapse/handlers/identity.py | 13 +- synapse/push/mailer.py | 85 ++++-- synapse/push/pusher.py | 4 +- synapse/python_dependencies.py | 2 +- synapse/res/templates/password_reset.html | 9 + synapse/res/templates/password_reset.txt | 7 + synapse/res/templates/password_reset_failure.html | 6 + synapse/res/templates/password_reset_success.html | 6 + synapse/rest/client/v2_alpha/account.py | 243 ++++++++++++++++- synapse/storage/_base.py | 6 +- synapse/storage/prepare_database.py | 2 +- synapse/storage/registration.py | 290 ++++++++++++++++++++- .../schema/delta/55/track_threepid_validations.sql | 31 +++ tests/utils.py | 1 - 20 files changed, 922 insertions(+), 71 deletions(-) create mode 100644 changelog.d/5377.feature create mode 100644 synapse/res/templates/password_reset.html create mode 100644 synapse/res/templates/password_reset.txt create mode 100644 synapse/res/templates/password_reset_failure.html create mode 100644 synapse/res/templates/password_reset_success.html create mode 100644 synapse/storage/schema/delta/55/track_threepid_validations.sql (limited to 'synapse/storage') diff --git a/changelog.d/5377.feature b/changelog.d/5377.feature new file mode 100644 index 0000000000..6aae41847a --- /dev/null +++ b/changelog.d/5377.feature @@ -0,0 +1 @@ +Add ability to perform password reset via email without trusting the identity server. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index a2e815ea52..ea73306fb9 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1065,10 +1065,8 @@ password_config: -# Enable sending emails for notification events or expiry notices -# Defining a custom URL for Riot is only needed if email notifications -# should contain links to a self-hosted installation of Riot; when set -# the "app_name" setting is ignored. +# Enable sending emails for password resets, notification events or +# account expiry notices # # If your SMTP server requires authentication, the optional smtp_user & # smtp_pass variables should be used @@ -1076,22 +1074,64 @@ password_config: #email: # enable_notifs: false # smtp_host: "localhost" -# smtp_port: 25 +# smtp_port: 25 # SSL: 465, STARTTLS: 587 # smtp_user: "exampleusername" # smtp_pass: "examplepassword" # require_transport_security: False # notif_from: "Your Friendly %(app)s Home Server " # app_name: Matrix -# # if template_dir is unset, uses the example templates that are part of -# # the Synapse distribution. +# +# # Enable email notifications by default +# notif_for_new_users: True +# +# # Defining a custom URL for Riot is only needed if email notifications +# # should contain links to a self-hosted installation of Riot; when set +# # the "app_name" setting is ignored +# riot_base_url: "http://localhost/riot" +# +# # Enable sending password reset emails via the configured, trusted +# # identity servers +# # +# # IMPORTANT! This will give a malicious or overtaken identity server +# # the ability to reset passwords for your users! Make absolutely sure +# # that you want to do this! It is strongly recommended that password +# # reset emails be sent by the homeserver instead +# # +# # If this option is set to false and SMTP options have not been +# # configured, resetting user passwords via email will be disabled +# #trust_identity_server_for_password_resets: false +# +# # Configure the time that a validation email or text message code +# # will expire after sending +# # +# # This is currently used for password resets +# #validation_token_lifetime: 1h +# +# # Template directory. All template files should be stored within this +# # directory +# # # #template_dir: res/templates +# +# # Templates for email notifications +# # # notif_template_html: notif_mail.html # notif_template_text: notif_mail.txt -# # Templates for account expiry notices. +# +# # Templates for account expiry notices +# # # expiry_template_html: notice_expiry.html # expiry_template_text: notice_expiry.txt -# notif_for_new_users: True -# riot_base_url: "http://localhost/riot" +# +# # Templates for password reset emails sent by the homeserver +# # +# #password_reset_template_html: password_reset.html +# #password_reset_template_text: password_reset.txt +# +# # Templates for password reset success and failure pages that a user +# # will see after attempting to reset their password +# # +# #password_reset_template_success_html: password_reset_success.html +# #password_reset_template_failure_html: password_reset_failure.html #password_providers: diff --git a/synapse/api/errors.py b/synapse/api/errors.py index e91697049c..66201d6efe 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -339,6 +339,15 @@ class UnsupportedRoomVersionError(SynapseError): ) +class ThreepidValidationError(SynapseError): + """An error raised when there was a problem authorising an event.""" + + def __init__(self, *args, **kwargs): + if "errcode" not in kwargs: + kwargs["errcode"] = Codes.FORBIDDEN + super(ThreepidValidationError, self).__init__(*args, **kwargs) + + class IncompatibleRoomVersionError(SynapseError): """A server is trying to join a room whose version it does not support. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 1045d28949..df524a23dd 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -176,6 +176,7 @@ class SynapseHomeServer(HomeServer): resources.update({ "/_matrix/client/api/v1": client_resource, + "/_synapse/password_reset": client_resource, "/_matrix/client/r0": client_resource, "/_matrix/client/unstable": client_resource, "/_matrix/client/v2_alpha": client_resource, diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index 8400471f40..ae04252906 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -50,6 +50,11 @@ class EmailConfig(Config): else: self.email_app_name = "Matrix" + # TODO: Rename notif_from to something more generic, or have a separate + # from for password resets, message notifications, etc? + # Currently the email section is a bit bogged down with settings for + # multiple functions. Would be good to split it out into separate + # sections and only put the common ones under email: self.email_notif_from = email_config.get("notif_from", None) if self.email_notif_from is not None: # make sure it's valid @@ -74,7 +79,28 @@ class EmailConfig(Config): "account_validity", {}, ).get("renew_at") - if self.email_enable_notifs or account_validity_renewal_enabled: + email_trust_identity_server_for_password_resets = email_config.get( + "trust_identity_server_for_password_resets", False, + ) + self.email_password_reset_behaviour = ( + "remote" if email_trust_identity_server_for_password_resets else "local" + ) + if self.email_password_reset_behaviour == "local" and email_config == {}: + logger.warn( + "User password resets have been disabled due to lack of email config" + ) + self.email_password_reset_behaviour = "off" + + # Get lifetime of a validation token in milliseconds + self.email_validation_token_lifetime = self.parse_duration( + email_config.get("validation_token_lifetime", "1h") + ) + + if ( + self.email_enable_notifs + or account_validity_renewal_enabled + or self.email_password_reset_behaviour == "local" + ): # make sure we can import the required deps import jinja2 import bleach @@ -82,6 +108,67 @@ class EmailConfig(Config): jinja2 bleach + if self.email_password_reset_behaviour == "local": + required = [ + "smtp_host", + "smtp_port", + "notif_from", + ] + + missing = [] + for k in required: + if k not in email_config: + missing.append(k) + + if (len(missing) > 0): + raise RuntimeError( + "email.password_reset_behaviour is set to 'local' " + "but required keys are missing: %s" % + (", ".join(["email." + k for k in missing]),) + ) + + # Templates for password reset emails + self.email_password_reset_template_html = email_config.get( + "password_reset_template_html", "password_reset.html", + ) + self.email_password_reset_template_text = email_config.get( + "password_reset_template_text", "password_reset.txt", + ) + self.email_password_reset_failure_template = email_config.get( + "password_reset_failure_template", "password_reset_failure.html", + ) + # This template does not support any replaceable variables, so we will + # read it from the disk once during setup + email_password_reset_success_template = email_config.get( + "password_reset_success_template", "password_reset_success.html", + ) + + # Check templates exist + for f in [self.email_password_reset_template_html, + self.email_password_reset_template_text, + self.email_password_reset_failure_template, + email_password_reset_success_template]: + p = os.path.join(self.email_template_dir, f) + if not os.path.isfile(p): + raise ConfigError("Unable to find template file %s" % (p, )) + + # Retrieve content of web templates + filepath = os.path.join( + self.email_template_dir, + email_password_reset_success_template, + ) + self.email_password_reset_success_html_content = self.read_file( + filepath, + "email.password_reset_template_success_html", + ) + + if config.get("public_baseurl") is None: + raise RuntimeError( + "email.password_reset_behaviour is set to 'local' but no " + "public_baseurl is set. This is necessary to generate password " + "reset links" + ) + if self.email_enable_notifs: required = [ "smtp_host", @@ -121,10 +208,6 @@ class EmailConfig(Config): self.email_riot_base_url = email_config.get( "riot_base_url", None ) - else: - self.email_enable_notifs = False - # Not much point setting defaults for the rest: it would be an - # error for them to be used. if account_validity_renewal_enabled: self.email_expiry_template_html = email_config.get( @@ -141,10 +224,8 @@ class EmailConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): return """ - # Enable sending emails for notification events or expiry notices - # Defining a custom URL for Riot is only needed if email notifications - # should contain links to a self-hosted installation of Riot; when set - # the "app_name" setting is ignored. + # Enable sending emails for password resets, notification events or + # account expiry notices # # If your SMTP server requires authentication, the optional smtp_user & # smtp_pass variables should be used @@ -152,20 +233,62 @@ class EmailConfig(Config): #email: # enable_notifs: false # smtp_host: "localhost" - # smtp_port: 25 + # smtp_port: 25 # SSL: 465, STARTTLS: 587 # smtp_user: "exampleusername" # smtp_pass: "examplepassword" # require_transport_security: False # notif_from: "Your Friendly %(app)s Home Server " # app_name: Matrix - # # if template_dir is unset, uses the example templates that are part of - # # the Synapse distribution. + # + # # Enable email notifications by default + # notif_for_new_users: True + # + # # Defining a custom URL for Riot is only needed if email notifications + # # should contain links to a self-hosted installation of Riot; when set + # # the "app_name" setting is ignored + # riot_base_url: "http://localhost/riot" + # + # # Enable sending password reset emails via the configured, trusted + # # identity servers + # # + # # IMPORTANT! This will give a malicious or overtaken identity server + # # the ability to reset passwords for your users! Make absolutely sure + # # that you want to do this! It is strongly recommended that password + # # reset emails be sent by the homeserver instead + # # + # # If this option is set to false and SMTP options have not been + # # configured, resetting user passwords via email will be disabled + # #trust_identity_server_for_password_resets: false + # + # # Configure the time that a validation email or text message code + # # will expire after sending + # # + # # This is currently used for password resets + # #validation_token_lifetime: 1h + # + # # Template directory. All template files should be stored within this + # # directory + # # # #template_dir: res/templates + # + # # Templates for email notifications + # # # notif_template_html: notif_mail.html # notif_template_text: notif_mail.txt - # # Templates for account expiry notices. + # + # # Templates for account expiry notices + # # # expiry_template_html: notice_expiry.html # expiry_template_text: notice_expiry.txt - # notif_for_new_users: True - # riot_base_url: "http://localhost/riot" + # + # # Templates for password reset emails sent by the homeserver + # # + # #password_reset_template_html: password_reset.html + # #password_reset_template_text: password_reset.txt + # + # # Templates for password reset success and failure pages that a user + # # will see after attempting to reset their password + # # + # #password_reset_template_success_html: password_reset_success.html + # #password_reset_template_failure_html: password_reset_failure.html """ diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index aa5d89a9ac..7f8ddc99c6 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -162,7 +162,7 @@ class AuthHandler(BaseHandler): defer.returnValue(params) @defer.inlineCallbacks - def check_auth(self, flows, clientdict, clientip): + def check_auth(self, flows, clientdict, clientip, password_servlet=False): """ Takes a dictionary sent by the client in the login / registration protocol and handles the User-Interactive Auth flow. @@ -186,6 +186,16 @@ class AuthHandler(BaseHandler): clientip (str): The IP address of the client. + password_servlet (bool): Whether the request originated from + PasswordRestServlet. + XXX: This is a temporary hack to distinguish between checking + for threepid validations locally (in the case of password + resets) and using the identity server (in the case of binding + a 3PID during registration). Once we start using the + homeserver for both tasks, this distinction will no longer be + necessary. + + Returns: defer.Deferred[dict, dict, str]: a deferred tuple of (creds, params, session_id). @@ -241,7 +251,9 @@ class AuthHandler(BaseHandler): if 'type' in authdict: login_type = authdict['type'] try: - result = yield self._check_auth_dict(authdict, clientip) + result = yield self._check_auth_dict( + authdict, clientip, password_servlet=password_servlet, + ) if result: creds[login_type] = result self._save_session(session) @@ -351,7 +363,7 @@ class AuthHandler(BaseHandler): return sess.setdefault('serverdict', {}).get(key, default) @defer.inlineCallbacks - def _check_auth_dict(self, authdict, clientip): + def _check_auth_dict(self, authdict, clientip, password_servlet=False): """Attempt to validate the auth dict provided by a client Args: @@ -369,7 +381,13 @@ class AuthHandler(BaseHandler): login_type = authdict['type'] checker = self.checkers.get(login_type) if checker is not None: - res = yield checker(authdict, clientip) + # XXX: Temporary workaround for having Synapse handle password resets + # See AuthHandler.check_auth for further details + res = yield checker( + authdict, + clientip=clientip, + password_servlet=password_servlet, + ) defer.returnValue(res) # build a v1-login-style dict out of the authdict and fall back to the @@ -383,7 +401,7 @@ class AuthHandler(BaseHandler): defer.returnValue(canonical_id) @defer.inlineCallbacks - def _check_recaptcha(self, authdict, clientip): + def _check_recaptcha(self, authdict, clientip, **kwargs): try: user_response = authdict["response"] except KeyError: @@ -429,20 +447,20 @@ class AuthHandler(BaseHandler): defer.returnValue(True) raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) - def _check_email_identity(self, authdict, _): - return self._check_threepid('email', authdict) + def _check_email_identity(self, authdict, **kwargs): + return self._check_threepid('email', authdict, **kwargs) - def _check_msisdn(self, authdict, _): + def _check_msisdn(self, authdict, **kwargs): return self._check_threepid('msisdn', authdict) - def _check_dummy_auth(self, authdict, _): + def _check_dummy_auth(self, authdict, **kwargs): return defer.succeed(True) - def _check_terms_auth(self, authdict, _): + def _check_terms_auth(self, authdict, **kwargs): return defer.succeed(True) @defer.inlineCallbacks - def _check_threepid(self, medium, authdict): + def _check_threepid(self, medium, authdict, password_servlet=False, **kwargs): if 'threepid_creds' not in authdict: raise LoginError(400, "Missing threepid_creds", Codes.MISSING_PARAM) @@ -451,7 +469,29 @@ class AuthHandler(BaseHandler): identity_handler = self.hs.get_handlers().identity_handler logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) - threepid = yield identity_handler.threepid_from_creds(threepid_creds) + if ( + not password_servlet + or self.hs.config.email_password_reset_behaviour == "remote" + ): + threepid = yield identity_handler.threepid_from_creds(threepid_creds) + elif self.hs.config.email_password_reset_behaviour == "local": + row = yield self.store.get_threepid_validation_session( + medium, + threepid_creds["client_secret"], + sid=threepid_creds["sid"], + ) + + threepid = { + "medium": row["medium"], + "address": row["address"], + "validated_at": row["validated_at"], + } if row else None + + if row: + # Valid threepid returned, delete from the db + yield self.store.delete_threepid_session(threepid_creds["sid"]) + else: + raise SynapseError(400, "Password resets are not enabled on this homeserver") if not threepid: raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 22469486d7..04caf65793 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -247,7 +247,14 @@ class IdentityHandler(BaseHandler): defer.returnValue(changed) @defer.inlineCallbacks - def requestEmailToken(self, id_server, email, client_secret, send_attempt, **kwargs): + def requestEmailToken( + self, + id_server, + email, + client_secret, + send_attempt, + next_link=None, + ): if not self._should_trust_id_server(id_server): raise SynapseError( 400, "Untrusted ID server '%s'" % id_server, @@ -259,7 +266,9 @@ class IdentityHandler(BaseHandler): 'client_secret': client_secret, 'send_attempt': send_attempt, } - params.update(kwargs) + + if next_link: + params.update({'next_link': next_link}) try: data = yield self.http_client.post_json_get_json( diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index c269bcf4a4..4bc9eb7313 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -80,10 +80,10 @@ ALLOWED_ATTRS = { class Mailer(object): - def __init__(self, hs, app_name, notif_template_html, notif_template_text): + def __init__(self, hs, app_name, template_html, template_text): self.hs = hs - self.notif_template_html = notif_template_html - self.notif_template_text = notif_template_text + self.template_html = template_html + self.template_text = template_text self.sendmail = self.hs.get_sendmail() self.store = self.hs.get_datastore() @@ -94,21 +94,48 @@ class Mailer(object): logger.info("Created Mailer for app_name %s" % app_name) @defer.inlineCallbacks - def send_notification_mail(self, app_id, user_id, email_address, - push_actions, reason): - try: - from_string = self.hs.config.email_notif_from % { - "app": self.app_name - } - except TypeError: - from_string = self.hs.config.email_notif_from + def send_password_reset_mail( + self, + email_address, + token, + client_secret, + sid, + ): + """Send an email with a password reset link to a user + + Args: + email_address (str): Email address we're sending the password + reset to + token (str): Unique token generated by the server to verify + password reset email was received + client_secret (str): Unique token generated by the client to + group together multiple email sending attempts + sid (str): The generated session ID + """ + if email.utils.parseaddr(email_address)[1] == '': + raise RuntimeError("Invalid 'to' email address") + + link = ( + self.hs.config.public_baseurl + + "_synapse/password_reset/email/submit_token" + "?token=%s&client_secret=%s&sid=%s" % + (token, client_secret, sid) + ) - raw_from = email.utils.parseaddr(from_string)[1] - raw_to = email.utils.parseaddr(email_address)[1] + template_vars = { + "link": link, + } - if raw_to == '': - raise RuntimeError("Invalid 'to' address") + yield self.send_email( + email_address, + "[%s] Password Reset Email" % self.hs.config.server_name, + template_vars, + ) + @defer.inlineCallbacks + def send_notification_mail(self, app_id, user_id, email_address, + push_actions, reason): + """Send email regarding a user's room notifications""" rooms_in_order = deduped_ordered_list( [pa['room_id'] for pa in push_actions] ) @@ -176,14 +203,36 @@ class Mailer(object): "reason": reason, } - html_text = self.notif_template_html.render(**template_vars) + yield self.send_email( + email_address, + "[%s] %s" % (self.app_name, summary_text), + template_vars, + ) + + @defer.inlineCallbacks + def send_email(self, email_address, subject, template_vars): + """Send an email with the given information and template text""" + try: + from_string = self.hs.config.email_notif_from % { + "app": self.app_name + } + except TypeError: + from_string = self.hs.config.email_notif_from + + raw_from = email.utils.parseaddr(from_string)[1] + raw_to = email.utils.parseaddr(email_address)[1] + + if raw_to == '': + raise RuntimeError("Invalid 'to' address") + + html_text = self.template_html.render(**template_vars) html_part = MIMEText(html_text, "html", "utf8") - plain_text = self.notif_template_text.render(**template_vars) + plain_text = self.template_text.render(**template_vars) text_part = MIMEText(plain_text, "plain", "utf8") multipart_msg = MIMEMultipart('alternative') - multipart_msg['Subject'] = "[%s] %s" % (self.app_name, summary_text) + multipart_msg['Subject'] = subject multipart_msg['From'] = from_string multipart_msg['To'] = email_address multipart_msg['Date'] = email.utils.formatdate() diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index 14bc7823cf..aff85daeb5 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -70,8 +70,8 @@ class PusherFactory(object): mailer = Mailer( hs=self.hs, app_name=app_name, - notif_template_html=self.notif_template_html, - notif_template_text=self.notif_template_text, + template_html=self.notif_template_html, + template_text=self.notif_template_text, ) self.mailers[app_name] = mailer return EmailPusher(self.hs, pusherdict, mailer) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index f64baa4d58..c78f2cb15e 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -77,7 +77,7 @@ REQUIREMENTS = [ ] CONDITIONAL_REQUIREMENTS = { - "email.enable_notifs": ["Jinja2>=2.9", "bleach>=1.4.2"], + "email": ["Jinja2>=2.9", "bleach>=1.4.2"], "matrix-synapse-ldap3": ["matrix-synapse-ldap3>=0.1"], # we use execute_batch, which arrived in psycopg 2.7. diff --git a/synapse/res/templates/password_reset.html b/synapse/res/templates/password_reset.html new file mode 100644 index 0000000000..4fa7b36734 --- /dev/null +++ b/synapse/res/templates/password_reset.html @@ -0,0 +1,9 @@ + + +

A password reset request has been received for your Matrix account. If this was you, please click the link below to confirm resetting your password:

+ + {{ link }} + +

If this was not you, please disregard this email and contact your server administrator. Thank you.

+ + diff --git a/synapse/res/templates/password_reset.txt b/synapse/res/templates/password_reset.txt new file mode 100644 index 0000000000..f0deff59a7 --- /dev/null +++ b/synapse/res/templates/password_reset.txt @@ -0,0 +1,7 @@ +A password reset request has been received for your Matrix account. If this +was you, please click the link below to confirm resetting your password: + +{{ link }} + +If this was not you, please disregard this email and contact your server +administrator. Thank you. diff --git a/synapse/res/templates/password_reset_failure.html b/synapse/res/templates/password_reset_failure.html new file mode 100644 index 0000000000..0b132cf8db --- /dev/null +++ b/synapse/res/templates/password_reset_failure.html @@ -0,0 +1,6 @@ + + + +

{{ failure_reason }}. Your password has not been reset.

+ + diff --git a/synapse/res/templates/password_reset_success.html b/synapse/res/templates/password_reset_success.html new file mode 100644 index 0000000000..7b6fa5e6f0 --- /dev/null +++ b/synapse/res/templates/password_reset_success.html @@ -0,0 +1,6 @@ + + + +

Your password was successfully reset. You may now close this window.

+ + diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index ca35dc3c83..e4c63b69b9 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -15,19 +15,25 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import re from six.moves import http_client +import jinja2 + from twisted.internet import defer from synapse.api.constants import LoginType -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, SynapseError, ThreepidValidationError +from synapse.http.server import finish_request from synapse.http.servlet import ( RestServlet, assert_params_in_dict, parse_json_object_from_request, + parse_string, ) from synapse.util.msisdn import phone_number_to_msisdn +from synapse.util.stringutils import random_string from synapse.util.threepids import check_3pid_allowed from ._base import client_patterns, interactive_auth_handler @@ -41,17 +47,42 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): def __init__(self, hs): super(EmailPasswordRequestTokenRestServlet, self).__init__() self.hs = hs + self.datastore = hs.get_datastore() + self.config = hs.config self.identity_handler = hs.get_handlers().identity_handler + if self.config.email_password_reset_behaviour == "local": + from synapse.push.mailer import Mailer, load_jinja2_templates + templates = load_jinja2_templates( + config=hs.config, + template_html_name=hs.config.email_password_reset_template_html, + template_text_name=hs.config.email_password_reset_template_text, + ) + self.mailer = Mailer( + hs=self.hs, + app_name=self.config.email_app_name, + template_html=templates[0], + template_text=templates[1], + ) + @defer.inlineCallbacks def on_POST(self, request): + if self.config.email_password_reset_behaviour == "off": + raise SynapseError(400, "Password resets have been disabled on this server") + body = parse_json_object_from_request(request) assert_params_in_dict(body, [ - 'id_server', 'client_secret', 'email', 'send_attempt' + 'client_secret', 'email', 'send_attempt' ]) - if not check_3pid_allowed(self.hs, "email", body['email']): + # Extract params from body + client_secret = body["client_secret"] + email = body["email"] + send_attempt = body["send_attempt"] + next_link = body.get("next_link") # Optional param + + if not check_3pid_allowed(self.hs, "email", email): raise SynapseError( 403, "Your email domain is not authorized on this server", @@ -59,15 +90,100 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( - 'email', body['email'] + 'email', email, ) if existingUid is None: raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND) - ret = yield self.identity_handler.requestEmailToken(**body) + if self.config.email_password_reset_behaviour == "remote": + if 'id_server' not in body: + raise SynapseError(400, "Missing 'id_server' param in body") + + # Have the identity server handle the password reset flow + ret = yield self.identity_handler.requestEmailToken( + body["id_server"], email, client_secret, send_attempt, next_link, + ) + else: + # Send password reset emails from Synapse + sid = yield self.send_password_reset( + email, client_secret, send_attempt, next_link, + ) + + # Wrap the session id in a JSON object + ret = {"sid": sid} + defer.returnValue((200, ret)) + @defer.inlineCallbacks + def send_password_reset( + self, + email, + client_secret, + send_attempt, + next_link=None, + ): + """Send a password reset email + + Args: + email (str): The user's email address + client_secret (str): The provided client secret + send_attempt (int): Which send attempt this is + + Returns: + The new session_id upon success + + Raises: + SynapseError is an error occurred when sending the email + """ + # Check that this email/client_secret/send_attempt combo is new or + # greater than what we've seen previously + session = yield self.datastore.get_threepid_validation_session( + "email", client_secret, address=email, validated=False, + ) + + # Check to see if a session already exists and that it is not yet + # marked as validated + if session and session.get("validated_at") is None: + session_id = session['session_id'] + last_send_attempt = session['last_send_attempt'] + + # Check that the send_attempt is higher than previous attempts + if send_attempt <= last_send_attempt: + # If not, just return a success without sending an email + defer.returnValue(session_id) + else: + # An non-validated session does not exist yet. + # Generate a session id + session_id = random_string(16) + + # Generate a new validation token + token = random_string(32) + + # Send the mail with the link containing the token, client_secret + # and session_id + try: + yield self.mailer.send_password_reset_mail( + email, token, client_secret, session_id, + ) + except Exception: + logger.exception( + "Error sending a password reset email to %s", email, + ) + raise SynapseError( + 500, "An error was encountered when sending the password reset email" + ) + + token_expires = (self.hs.clock.time_msec() + + self.config.email_validation_token_lifetime) + + yield self.datastore.start_or_continue_validation_session( + "email", email, session_id, client_secret, send_attempt, + next_link, token, token_expires, + ) + + defer.returnValue(session_id) + class MsisdnPasswordRequestTokenRestServlet(RestServlet): PATTERNS = client_patterns("/account/password/msisdn/requestToken$") @@ -80,6 +196,9 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request): + if not self.config.email_password_reset_behaviour == "off": + raise SynapseError(400, "Password resets have been disabled on this server") + body = parse_json_object_from_request(request) assert_params_in_dict(body, [ @@ -107,6 +226,118 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): defer.returnValue((200, ret)) +class PasswordResetSubmitTokenServlet(RestServlet): + """Handles 3PID validation token submission""" + PATTERNS = [ + re.compile("^/_synapse/password_reset/(?P[^/]*)/submit_token/*$"), + ] + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(PasswordResetSubmitTokenServlet, self).__init__() + self.hs = hs + self.auth = hs.get_auth() + self.config = hs.config + self.clock = hs.get_clock() + self.datastore = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, medium): + if medium != "email": + raise SynapseError( + 400, + "This medium is currently not supported for password resets", + ) + + sid = parse_string(request, "sid") + client_secret = parse_string(request, "client_secret") + token = parse_string(request, "token") + + # Attempt to validate a 3PID sesssion + try: + # Mark the session as valid + next_link = yield self.datastore.validate_threepid_session( + sid, + client_secret, + token, + self.clock.time_msec(), + ) + + # Perform a 302 redirect if next_link is set + if next_link: + if next_link.startswith("file:///"): + logger.warn( + "Not redirecting to next_link as it is a local file: address" + ) + else: + request.setResponseCode(302) + request.setHeader("Location", next_link) + finish_request(request) + defer.returnValue(None) + + # Otherwise show the success template + html = self.config.email_password_reset_success_html_content + request.setResponseCode(200) + except ThreepidValidationError as e: + # Show a failure page with a reason + html = self.load_jinja2_template( + self.config.email_template_dir, + self.config.email_password_reset_failure_template, + template_vars={ + "failure_reason": e.msg, + } + ) + request.setResponseCode(e.code) + + request.write(html.encode('utf-8')) + finish_request(request) + defer.returnValue(None) + + def load_jinja2_template(self, template_dir, template_filename, template_vars): + """Loads a jinja2 template with variables to insert + + Args: + template_dir (str): The directory where templates are stored + template_filename (str): The name of the template in the template_dir + template_vars (Dict): Dictionary of keys in the template + alongside their values to insert + + Returns: + str containing the contents of the rendered template + """ + loader = jinja2.FileSystemLoader(template_dir) + env = jinja2.Environment(loader=loader) + + template = env.get_template(template_filename) + return template.render(**template_vars) + + @defer.inlineCallbacks + def on_POST(self, request, medium): + if medium != "email": + raise SynapseError( + 400, + "This medium is currently not supported for password resets", + ) + + body = parse_json_object_from_request(request) + assert_params_in_dict(body, [ + 'sid', 'client_secret', 'token', + ]) + + valid, _ = yield self.datastore.validate_threepid_validation_token( + body['sid'], + body['client_secret'], + body['token'], + self.clock.time_msec(), + ) + response_code = 200 if valid else 400 + + defer.returnValue((response_code, {"success": valid})) + + class PasswordRestServlet(RestServlet): PATTERNS = client_patterns("/account/password$") @@ -144,6 +375,7 @@ class PasswordRestServlet(RestServlet): result, params, _ = yield self.auth_handler.check_auth( [[LoginType.EMAIL_IDENTITY], [LoginType.MSISDN]], body, self.hs.get_ip_from_request(request), + password_servlet=True, ) if LoginType.EMAIL_IDENTITY in result: @@ -417,6 +649,7 @@ class WhoamiRestServlet(RestServlet): def register_servlets(hs, http_server): EmailPasswordRequestTokenRestServlet(hs).register(http_server) MsisdnPasswordRequestTokenRestServlet(hs).register(http_server) + PasswordResetSubmitTokenServlet(hs).register(http_server) PasswordRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) EmailThreepidRequestTokenRestServlet(hs).register(http_server) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 52891bb9eb..ae891aa332 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -588,6 +588,10 @@ class SQLBaseStore(object): Args: table : string giving the table name values : dict of new column names and values for them + or_ignore : bool stating whether an exception should be raised + when a conflicting row already exists. If True, False will be + returned by the function instead + desc : string giving a description of the transaction Returns: bool: Whether the row was inserted or not. Only useful when @@ -1228,8 +1232,8 @@ class SQLBaseStore(object): ) txn.execute(select_sql, list(keyvalues.values())) - row = txn.fetchone() + if not row: if allow_none: return None diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index c1711bc8bd..23a4baa484 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 54 +SCHEMA_VERSION = 55 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 4cf159ba81..9b41cbd757 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -17,17 +17,20 @@ import re +from six import iterkeys from six.moves import range from twisted.internet import defer from synapse.api.constants import UserTypes -from synapse.api.errors import Codes, StoreError +from synapse.api.errors import Codes, StoreError, ThreepidValidationError from synapse.storage import background_updates from synapse.storage._base import SQLBaseStore from synapse.types import UserID from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +THIRTY_MINUTES_IN_MS = 30 * 60 * 1000 + class RegistrationWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): @@ -422,7 +425,7 @@ class RegistrationWorkerStore(SQLBaseStore): defer.returnValue(None) @defer.inlineCallbacks - def get_user_id_by_threepid(self, medium, address): + def get_user_id_by_threepid(self, medium, address, require_verified=False): """Returns user id from threepid Args: @@ -595,6 +598,11 @@ class RegistrationStore( "user_threepids_grandfather", self._bg_user_threepids_grandfather, ) + # Create a background job for culling expired 3PID validity tokens + hs.get_clock().looping_call( + self.cull_expired_threepid_validation_tokens, THIRTY_MINUTES_IN_MS, + ) + @defer.inlineCallbacks def add_access_token_to_user(self, user_id, token, device_id=None): """Adds an access token for the given user. @@ -963,7 +971,6 @@ class RegistrationStore( We do this by grandfathering in existing user threepids assuming that they used one of the server configured trusted identity servers. """ - id_servers = set(self.config.trusted_third_party_id_servers) def _bg_user_threepids_grandfather_txn(txn): @@ -984,3 +991,280 @@ class RegistrationStore( yield self._end_background_update("user_threepids_grandfather") defer.returnValue(1) + + def get_threepid_validation_session( + self, + medium, + client_secret, + address=None, + sid=None, + validated=None, + ): + """Gets a session_id and last_send_attempt (if available) for a + client_secret/medium/(address|session_id) combo + + Args: + medium (str|None): The medium of the 3PID + address (str|None): The address of the 3PID + sid (str|None): The ID of the validation session + client_secret (str|None): A unique string provided by the client to + help identify this validation attempt + validated (bool|None): Whether sessions should be filtered by + whether they have been validated already or not. None to + perform no filtering + + Returns: + deferred {str, int}|None: A dict containing the + latest session_id and send_attempt count for this 3PID. + Otherwise None if there hasn't been a previous attempt + """ + keyvalues = { + "medium": medium, + "client_secret": client_secret, + } + if address: + keyvalues["address"] = address + if sid: + keyvalues["session_id"] = sid + + assert(address or sid) + + def get_threepid_validation_session_txn(txn): + sql = """ + SELECT address, session_id, medium, client_secret, + last_send_attempt, validated_at + FROM threepid_validation_session WHERE %s + """ % (" AND ".join("%s = ?" % k for k in iterkeys(keyvalues)),) + + if validated is not None: + sql += " AND validated_at IS " + ("NOT NULL" if validated else "NULL") + + sql += " LIMIT 1" + + txn.execute(sql, list(keyvalues.values())) + rows = self.cursor_to_dict(txn) + if not rows: + return None + + return rows[0] + + return self.runInteraction( + "get_threepid_validation_session", + get_threepid_validation_session_txn, + ) + + def validate_threepid_session( + self, + session_id, + client_secret, + token, + current_ts, + ): + """Attempt to validate a threepid session using a token + + Args: + session_id (str): The id of a validation session + client_secret (str): A unique string provided by the client to + help identify this validation attempt + token (str): A validation token + current_ts (int): The current unix time in milliseconds. Used for + checking token expiry status + + Returns: + deferred str|None: A str representing a link to redirect the user + to if there is one. + """ + # Insert everything into a transaction in order to run atomically + def validate_threepid_session_txn(txn): + row = self._simple_select_one_txn( + txn, + table="threepid_validation_session", + keyvalues={"session_id": session_id}, + retcols=["client_secret", "validated_at"], + allow_none=True, + ) + + if not row: + raise ThreepidValidationError(400, "Unknown session_id") + retrieved_client_secret = row["client_secret"] + validated_at = row["validated_at"] + + if retrieved_client_secret != client_secret: + raise ThreepidValidationError( + 400, "This client_secret does not match the provided session_id", + ) + + row = self._simple_select_one_txn( + txn, + table="threepid_validation_token", + keyvalues={"session_id": session_id, "token": token}, + retcols=["expires", "next_link"], + allow_none=True, + ) + + if not row: + raise ThreepidValidationError( + 400, "Validation token not found or has expired", + ) + expires = row["expires"] + next_link = row["next_link"] + + # If the session is already validated, no need to revalidate + if validated_at: + return next_link + + if expires <= current_ts: + raise ThreepidValidationError( + 400, "This token has expired. Please request a new one", + ) + + # Looks good. Validate the session + self._simple_update_txn( + txn, + table="threepid_validation_session", + keyvalues={"session_id": session_id}, + updatevalues={"validated_at": self.clock.time_msec()}, + ) + + return next_link + + # Return next_link if it exists + return self.runInteraction( + "validate_threepid_session_txn", + validate_threepid_session_txn, + ) + + def upsert_threepid_validation_session( + self, + medium, + address, + client_secret, + send_attempt, + session_id, + validated_at=None, + ): + """Upsert a threepid validation session + Args: + medium (str): The medium of the 3PID + address (str): The address of the 3PID + client_secret (str): A unique string provided by the client to + help identify this validation attempt + send_attempt (int): The latest send_attempt on this session + session_id (str): The id of this validation session + validated_at (int|None): The unix timestamp in milliseconds of + when the session was marked as valid + """ + insertion_values = { + "medium": medium, + "address": address, + "client_secret": client_secret, + } + + if validated_at: + insertion_values["validated_at"] = validated_at + + return self._simple_upsert( + table="threepid_validation_session", + keyvalues={"session_id": session_id}, + values={"last_send_attempt": send_attempt}, + insertion_values=insertion_values, + desc="upsert_threepid_validation_session", + ) + + def start_or_continue_validation_session( + self, + medium, + address, + session_id, + client_secret, + send_attempt, + next_link, + token, + token_expires, + ): + """Creates a new threepid validation session if it does not already + exist and associates a new validation token with it + + Args: + medium (str): The medium of the 3PID + address (str): The address of the 3PID + session_id (str): The id of this validation session + client_secret (str): A unique string provided by the client to + help identify this validation attempt + send_attempt (int): The latest send_attempt on this session + next_link (str|None): The link to redirect the user to upon + successful validation + token (str): The validation token + token_expires (int): The timestamp for which after the token + will no longer be valid + """ + def start_or_continue_validation_session_txn(txn): + # Create or update a validation session + self._simple_upsert_txn( + txn, + table="threepid_validation_session", + keyvalues={"session_id": session_id}, + values={"last_send_attempt": send_attempt}, + insertion_values={ + "medium": medium, + "address": address, + "client_secret": client_secret, + }, + ) + + # Create a new validation token with this session ID + self._simple_insert_txn( + txn, + table="threepid_validation_token", + values={ + "session_id": session_id, + "token": token, + "next_link": next_link, + "expires": token_expires, + }, + ) + + return self.runInteraction( + "start_or_continue_validation_session", + start_or_continue_validation_session_txn, + ) + + def cull_expired_threepid_validation_tokens(self): + """Remove threepid validation tokens with expiry dates that have passed""" + def cull_expired_threepid_validation_tokens_txn(txn, ts): + sql = """ + DELETE FROM threepid_validation_token WHERE + expires < ? + """ + return txn.execute(sql, (ts,)) + + return self.runInteraction( + "cull_expired_threepid_validation_tokens", + cull_expired_threepid_validation_tokens_txn, + self.clock.time_msec(), + ) + + def delete_threepid_session(self, session_id): + """Removes a threepid validation session from the database. This can + be done after validation has been performed and whatever action was + waiting on it has been carried out + + Args: + session_id (str): The ID of the session to delete + """ + def delete_threepid_session_txn(txn): + self._simple_delete_txn( + txn, + table="threepid_validation_token", + keyvalues={"session_id": session_id}, + ) + self._simple_delete_txn( + txn, + table="threepid_validation_session", + keyvalues={"session_id": session_id}, + ) + + return self.runInteraction( + "delete_threepid_session", + delete_threepid_session_txn, + ) diff --git a/synapse/storage/schema/delta/55/track_threepid_validations.sql b/synapse/storage/schema/delta/55/track_threepid_validations.sql new file mode 100644 index 0000000000..a8eced2e0a --- /dev/null +++ b/synapse/storage/schema/delta/55/track_threepid_validations.sql @@ -0,0 +1,31 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +CREATE TABLE IF NOT EXISTS threepid_validation_session ( + session_id TEXT PRIMARY KEY, + medium TEXT NOT NULL, + address TEXT NOT NULL, + client_secret TEXT NOT NULL, + last_send_attempt BIGINT NOT NULL, + validated_at BIGINT +); + +CREATE TABLE IF NOT EXISTS threepid_validation_token ( + token TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + next_link TEXT, + expires BIGINT NOT NULL +); + +CREATE INDEX threepid_validation_token_session_id ON threepid_validation_token(session_id); diff --git a/tests/utils.py b/tests/utils.py index 200c1ceabe..b2817cf22c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -131,7 +131,6 @@ def default_config(name, parse=False): "password_providers": [], "worker_replication_url": "", "worker_app": None, - "email_enable_notifs": False, "block_non_admin_invites": False, "federation_domain_whitelist": None, "filter_timeline_limit": 5000, -- cgit 1.5.1 From ed872db8df61f5ee019bdfdb68e1e83e9e2b7298 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 7 Jun 2019 02:53:47 +1000 Subject: fix maybe --- .../schema/full_schemas/54/stream_positions.sql | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/full_schemas/54/stream_positions.sql b/synapse/storage/schema/full_schemas/54/stream_positions.sql index 575ab6b354..c265fd20e2 100644 --- a/synapse/storage/schema/full_schemas/54/stream_positions.sql +++ b/synapse/storage/schema/full_schemas/54/stream_positions.sql @@ -2,24 +2,6 @@ INSERT INTO appservice_stream_position (stream_ordering) SELECT COALESCE(MAX(stream_ordering), 0) FROM events; INSERT INTO federation_stream_position (type, stream_id) VALUES ('federation', -1); INSERT INTO federation_stream_position (type, stream_id) SELECT 'events', coalesce(max(stream_ordering), -1) FROM events; -INSERT INTO user_directory_stream_pos (stream_id) VALUES (null); -INSERT INTO stats_stream_pos (stream_id) VALUES (null); +INSERT INTO user_directory_stream_pos (stream_id) VALUES (0); +INSERT INTO stats_stream_pos (stream_id) VALUES (0); INSERT INTO event_push_summary_stream_ordering (stream_ordering) VALUES (0); - ---- User dir population - --- Set up staging tables -INSERT INTO background_updates (update_name, progress_json) VALUES - ('populate_user_directory_createtables', '{}'); - --- Run through each room and update the user directory according to who is in it -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'); - --- Insert all users, if search_all_users is on -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms'); - --- Clean up staging tables -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users'); -- cgit 1.5.1 From 2d1d7b7e6f2bec3b96b0d23993369ce46aad4f32 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 6 Jun 2019 23:54:00 +0100 Subject: Prevent multiple device list updates from breaking a batch send (#5156) fixes #5153 --- changelog.d/5156.bugfix | 1 + synapse/federation/sender/per_destination_queue.py | 5 +- synapse/storage/devices.py | 152 +++++++++++++++++---- tests/storage/test_devices.py | 69 ++++++++++ 4 files changed, 196 insertions(+), 31 deletions(-) create mode 100644 changelog.d/5156.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/5156.bugfix b/changelog.d/5156.bugfix new file mode 100644 index 0000000000..e8aa7d8241 --- /dev/null +++ b/changelog.d/5156.bugfix @@ -0,0 +1 @@ +Prevent federation device list updates breaking when processing multiple updates at once. \ No newline at end of file diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index fae8bea392..564c57203d 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -349,9 +349,10 @@ class PerDestinationQueue(object): @defer.inlineCallbacks def _get_new_device_messages(self, limit): last_device_list = self._last_device_list_stream_id - # Will return at most 20 entries + + # Retrieve list of new device updates to send to the destination now_stream_id, results = yield self._store.get_devices_by_remote( - self._destination, last_device_list + self._destination, last_device_list, limit=limit, ) edus = [ Edu( diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index fd869b934c..d102e07372 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -from six import iteritems, itervalues +from six import iteritems from canonicaljson import json @@ -72,11 +72,14 @@ class DeviceWorkerStore(SQLBaseStore): defer.returnValue({d["device_id"]: d for d in devices}) - def get_devices_by_remote(self, destination, from_stream_id): + @defer.inlineCallbacks + def get_devices_by_remote(self, destination, from_stream_id, limit): """Get stream of updates to send to remote servers Returns: - (int, list[dict]): current stream id and list of updates + Deferred[tuple[int, list[dict]]]: + current stream id (ie, the stream id of the last update included in the + response), and the list of updates """ now_stream_id = self._device_list_id_gen.get_current_token() @@ -84,55 +87,131 @@ class DeviceWorkerStore(SQLBaseStore): destination, int(from_stream_id) ) if not has_changed: - return (now_stream_id, []) - - return self.runInteraction( + defer.returnValue((now_stream_id, [])) + + # We retrieve n+1 devices from the list of outbound pokes where n is + # our outbound device update limit. We then check if the very last + # device has the same stream_id as the second-to-last device. If so, + # then we ignore all devices with that stream_id and only send the + # devices with a lower stream_id. + # + # If when culling the list we end up with no devices afterwards, we + # consider the device update to be too large, and simply skip the + # stream_id; the rationale being that such a large device list update + # is likely an error. + updates = yield self.runInteraction( "get_devices_by_remote", self._get_devices_by_remote_txn, destination, from_stream_id, now_stream_id, + limit + 1, ) + # Return an empty list if there are no updates + if not updates: + defer.returnValue((now_stream_id, [])) + + # if we have exceeded the limit, we need to exclude any results with the + # same stream_id as the last row. + if len(updates) > limit: + stream_id_cutoff = updates[-1][2] + now_stream_id = stream_id_cutoff - 1 + else: + stream_id_cutoff = None + + # Perform the equivalent of a GROUP BY + # + # Iterate through the updates list and copy non-duplicate + # (user_id, device_id) entries into a map, with the value being + # the max stream_id across each set of duplicate entries + # + # maps (user_id, device_id) -> stream_id + # as long as their stream_id does not match that of the last row + query_map = {} + for update in updates: + if stream_id_cutoff is not None and update[2] >= stream_id_cutoff: + # Stop processing updates + break + + key = (update[0], update[1]) + query_map[key] = max(query_map.get(key, 0), update[2]) + + # If we didn't find any updates with a stream_id lower than the cutoff, it + # means that there are more than limit updates all of which have the same + # steam_id. + + # That should only happen if a client is spamming the server with new + # devices, in which case E2E isn't going to work well anyway. We'll just + # skip that stream_id and return an empty list, and continue with the next + # stream_id next time. + if not query_map: + defer.returnValue((stream_id_cutoff, [])) + + results = yield self._get_device_update_edus_by_remote( + destination, + from_stream_id, + query_map, + ) + + defer.returnValue((now_stream_id, results)) + def _get_devices_by_remote_txn( - self, txn, destination, from_stream_id, now_stream_id + self, txn, destination, from_stream_id, now_stream_id, limit ): + """Return device update information for a given remote destination + + Args: + txn (LoggingTransaction): The transaction to execute + destination (str): The host the device updates are intended for + from_stream_id (int): The minimum stream_id to filter updates by, exclusive + now_stream_id (int): The maximum stream_id to filter updates by, inclusive + limit (int): Maximum number of device updates to return + + Returns: + List: List of device updates + """ sql = """ - SELECT user_id, device_id, max(stream_id) FROM device_lists_outbound_pokes + SELECT user_id, device_id, stream_id FROM device_lists_outbound_pokes WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? - GROUP BY user_id, device_id - LIMIT 20 + ORDER BY stream_id + LIMIT ? """ - txn.execute(sql, (destination, from_stream_id, now_stream_id, False)) + txn.execute(sql, (destination, from_stream_id, now_stream_id, False, limit)) - # maps (user_id, device_id) -> stream_id - query_map = {(r[0], r[1]): r[2] for r in txn} - if not query_map: - return (now_stream_id, []) + return list(txn) - if len(query_map) >= 20: - now_stream_id = max(stream_id for stream_id in itervalues(query_map)) + @defer.inlineCallbacks + def _get_device_update_edus_by_remote( + self, destination, from_stream_id, query_map, + ): + """Returns a list of device update EDUs as well as E2EE keys - devices = self._get_e2e_device_keys_txn( - txn, + Args: + destination (str): The host the device updates are intended for + from_stream_id (int): The minimum stream_id to filter updates by, exclusive + query_map (Dict[(str, str): int]): Dictionary mapping + user_id/device_id to update stream_id + + Returns: + List[Dict]: List of objects representing an device update EDU + + """ + devices = yield self.runInteraction( + "_get_e2e_device_keys_txn", + self._get_e2e_device_keys_txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True, ) - prev_sent_id_sql = """ - SELECT coalesce(max(stream_id), 0) as stream_id - FROM device_lists_outbound_last_success - WHERE destination = ? AND user_id = ? AND stream_id <= ? - """ - results = [] for user_id, user_devices in iteritems(devices): # The prev_id for the first row is always the last row before # `from_stream_id` - txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) - rows = txn.fetchall() - prev_id = rows[0][0] + prev_id = yield self._get_last_device_update_for_remote_user( + destination, user_id, from_stream_id, + ) for device_id, device in iteritems(user_devices): stream_id = query_map[(user_id, device_id)] result = { @@ -156,7 +235,22 @@ class DeviceWorkerStore(SQLBaseStore): results.append(result) - return (now_stream_id, results) + defer.returnValue(results) + + def _get_last_device_update_for_remote_user( + self, destination, user_id, from_stream_id, + ): + def f(txn): + prev_sent_id_sql = """ + SELECT coalesce(max(stream_id), 0) as stream_id + FROM device_lists_outbound_last_success + WHERE destination = ? AND user_id = ? AND stream_id <= ? + """ + txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) + rows = txn.fetchall() + return rows[0][0] + + return self.runInteraction("get_last_device_update_for_remote_user", f) def mark_as_sent_devices_by_remote(self, destination, stream_id): """Mark that updates have successfully been sent to the destination. diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py index aef4dfaf57..6396ccddb5 100644 --- a/tests/storage/test_devices.py +++ b/tests/storage/test_devices.py @@ -71,6 +71,75 @@ class DeviceStoreTestCase(tests.unittest.TestCase): res["device2"], ) + @defer.inlineCallbacks + def test_get_devices_by_remote(self): + device_ids = ["device_id1", "device_id2"] + + # Add two device updates with a single stream_id + yield self.store.add_device_change_to_streams( + "user_id", device_ids, ["somehost"], + ) + + # Get all device updates ever meant for this remote + now_stream_id, device_updates = yield self.store.get_devices_by_remote( + "somehost", -1, limit=100, + ) + + # Check original device_ids are contained within these updates + self._check_devices_in_updates(device_ids, device_updates) + + @defer.inlineCallbacks + def test_get_devices_by_remote_limited(self): + # Test breaking the update limit in 1, 101, and 1 device_id segments + + # first add one device + device_ids1 = ["device_id0"] + yield self.store.add_device_change_to_streams( + "user_id", device_ids1, ["someotherhost"], + ) + + # then add 101 + device_ids2 = ["device_id" + str(i + 1) for i in range(101)] + yield self.store.add_device_change_to_streams( + "user_id", device_ids2, ["someotherhost"], + ) + + # then one more + device_ids3 = ["newdevice"] + yield self.store.add_device_change_to_streams( + "user_id", device_ids3, ["someotherhost"], + ) + + # + # now read them back. + # + + # first we should get a single update + now_stream_id, device_updates = yield self.store.get_devices_by_remote( + "someotherhost", -1, limit=100, + ) + self._check_devices_in_updates(device_ids1, device_updates) + + # Then we should get an empty list back as the 101 devices broke the limit + now_stream_id, device_updates = yield self.store.get_devices_by_remote( + "someotherhost", now_stream_id, limit=100, + ) + self.assertEqual(len(device_updates), 0) + + # The 101 devices should've been cleared, so we should now just get one device + # update + now_stream_id, device_updates = yield self.store.get_devices_by_remote( + "someotherhost", now_stream_id, limit=100, + ) + self._check_devices_in_updates(device_ids3, device_updates) + + def _check_devices_in_updates(self, expected_device_ids, device_updates): + """Check that an specific device ids exist in a list of device update EDUs""" + self.assertEqual(len(device_updates), len(expected_device_ids)) + + received_device_ids = {update["device_id"] for update in device_updates} + self.assertEqual(received_device_ids, set(expected_device_ids)) + @defer.inlineCallbacks def test_update_device(self): yield self.store.store_device("user_id", "device_id", "display_name 1") -- cgit 1.5.1 From 43badd2cd4315c3f3ed45b0092c4479a43a3eb52 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Jun 2019 14:31:05 +0100 Subject: Fix key verification when key stored with null valid_until_ms Some keys are stored in the synapse database with a null valid_until_ms which caused an exception to be thrown when using that key. We fix this by treating nulls as zeroes, i.e. they keys will match verification requests with a minimum_valid_until_ms of zero (i.e. don't validate ts) but will not match requests with a non-zero minimum_valid_until_ms. Fixes #5391. --- synapse/storage/keys.py | 8 +++++++ tests/crypto/test_keyring.py | 50 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 5300720dbb..e3655ad8d7 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -80,6 +80,14 @@ class KeyStore(SQLBaseStore): for row in txn: server_name, key_id, key_bytes, ts_valid_until_ms = row + + if ts_valid_until_ms is None: + # Old keys may be stored with a ts_valid_until_ms of null, + # in which case we treat this as if it was set to `0`, i.e. + # it won't match key requests that define a minimum + # `ts_valid_until_ms`. + ts_valid_until_ms = 0 + res = FetchKeyResult( verify_key=decode_verify_key_bytes(key_id, bytes(key_bytes)), valid_until_ts=ts_valid_until_ms, diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index 4b1901ce31..5a355f00cc 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -25,7 +25,11 @@ from twisted.internet import defer from synapse.api.errors import SynapseError from synapse.crypto import keyring -from synapse.crypto.keyring import PerspectivesKeyFetcher, ServerKeyFetcher +from synapse.crypto.keyring import ( + PerspectivesKeyFetcher, + ServerKeyFetcher, + StoreKeyFetcher, +) from synapse.storage.keys import FetchKeyResult from synapse.util import logcontext from synapse.util.logcontext import LoggingContext @@ -219,6 +223,50 @@ class KeyringTestCase(unittest.HomeserverTestCase): # self.assertFalse(d.called) self.get_success(d) + def test_verify_json_for_server_with_null_valid_until_ms(self): + """Tests that we correctly handle key requests for keys we've stored + with a null `ts_valid_until_ms` + """ + mock_fetcher = keyring.KeyFetcher() + mock_fetcher.get_keys = Mock(return_value=defer.succeed({})) + + kr = keyring.Keyring( + self.hs, key_fetchers=(StoreKeyFetcher(self.hs), mock_fetcher) + ) + + key1 = signedjson.key.generate_signing_key(1) + r = self.hs.datastore.store_server_verify_keys( + "server9", + time.time() * 1000, + [("server9", get_key_id(key1), FetchKeyResult(get_verify_key(key1), None))], + ) + self.get_success(r) + + json1 = {} + signedjson.sign.sign_json(json1, "server9", key1) + + # should fail immediately on an unsigned object + d = _verify_json_for_server(kr, "server9", {}, 0, "test unsigned") + self.failureResultOf(d, SynapseError) + + # should fail on a signed object with a non-zero minimum_valid_until_ms, + # as it tries to refetch the keys and fails. + d = _verify_json_for_server( + kr, "server9", json1, 500, "test signed non-zero min" + ) + self.get_failure(d, SynapseError) + + # We expect the keyring tried to refetch the key once. + mock_fetcher.get_keys.assert_called_once_with( + {"server9": {get_key_id(key1): 500}} + ) + + # should succeed on a signed object with a 0 minimum_valid_until_ms + d = _verify_json_for_server( + kr, "server9", json1, 0, "test signed with zero min" + ) + self.get_success(d) + def test_verify_json_dedupes_key_requests(self): """Two requests for the same key should be deduped.""" key1 = signedjson.key.generate_signing_key(1) -- cgit 1.5.1 From c413540fb9e4b6ee2ec975a98676ea56d12249c8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Jun 2019 16:21:42 +0100 Subject: Fix bug sending federation transactions with lots of EDUs If we try and send a transaction with lots of EDUs and we run out of space, we call get_new_device_msgs_for_remote with a limit of 0, which then failed. --- synapse/storage/deviceinbox.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index 9b0a99cb49..4ea0deea4f 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -138,6 +138,10 @@ class DeviceInboxWorkerStore(SQLBaseStore): if not has_changed or last_stream_id == current_stream_id: return defer.succeed(([], current_stream_id)) + if limit <= 0: + # This can happen if we run out of room for EDUs in the transaction. + return defer.succeed(([], last_stream_id)) + def get_new_messages_for_remote_destination_txn(txn): sql = ( "SELECT stream_id, messages_json FROM device_federation_outbox" -- cgit 1.5.1 From 49e01e5710fdbd9bb8da24844718eb2f5d6ee5c7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Jun 2019 23:09:31 +0100 Subject: Fix defaults on checking threepids --- synapse/handlers/auth.py | 1 + synapse/storage/registration.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 7f8ddc99c6..a0cf37a9f9 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -479,6 +479,7 @@ class AuthHandler(BaseHandler): medium, threepid_creds["client_secret"], sid=threepid_creds["sid"], + validated=True, ) threepid = { diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 9b41cbd757..1dd1182e82 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -998,7 +998,7 @@ class RegistrationStore( client_secret, address=None, sid=None, - validated=None, + validated=True, ): """Gets a session_id and last_send_attempt (if available) for a client_secret/medium/(address|session_id) combo -- cgit 1.5.1 From 94dac0f3e55938916ec76a2183d6703af6ea4362 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 10 Jun 2019 23:33:59 +0100 Subject: add monthly active users to phonehome stats (#5252) * add monthly active users to phonehome stats --- changelog.d/5252.feature | 1 + synapse/app/homeserver.py | 1 + synapse/storage/__init__.py | 44 +++++++++++++++++++++++++++++--------------- 3 files changed, 31 insertions(+), 15 deletions(-) create mode 100644 changelog.d/5252.feature (limited to 'synapse/storage') diff --git a/changelog.d/5252.feature b/changelog.d/5252.feature new file mode 100644 index 0000000000..44115b0382 --- /dev/null +++ b/changelog.d/5252.feature @@ -0,0 +1 @@ +Add monthly active users to phonehome stats. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index df524a23dd..811b547dd3 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -541,6 +541,7 @@ def run(hs): stats["total_room_count"] = room_count stats["daily_active_users"] = yield hs.get_datastore().count_daily_users() + stats["monthly_active_users"] = yield hs.get_datastore().count_monthly_users() stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 71316f7d09..0ca6f6121f 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -279,23 +279,37 @@ class DataStore( """ Counts the number of users who used this homeserver in the last 24 hours. """ + yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24) + return self.runInteraction("count_daily_users", self._count_users, yesterday,) - def _count_users(txn): - yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24) - - sql = """ - SELECT COALESCE(count(*), 0) FROM ( - SELECT user_id FROM user_ips - WHERE last_seen > ? - GROUP BY user_id - ) u - """ - - txn.execute(sql, (yesterday,)) - count, = txn.fetchone() - return count + def count_monthly_users(self): + """ + Counts the number of users who used this homeserver in the last 30 days. + Note this method is intended for phonehome metrics only and is different + from the mau figure in synapse.storage.monthly_active_users which, + amongst other things, includes a 3 day grace period before a user counts. + """ + thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) + return self.runInteraction( + "count_monthly_users", + self._count_users, + thirty_days_ago, + ) - return self.runInteraction("count_users", _count_users) + def _count_users(self, txn, time_from): + """ + Returns number of users seen in the past time_from period + """ + sql = """ + SELECT COALESCE(count(*), 0) FROM ( + SELECT user_id FROM user_ips + WHERE last_seen > ? + GROUP BY user_id + ) u + """ + txn.execute(sql, (time_from,)) + count, = txn.fetchone() + return count def count_r30_users(self): """ -- cgit 1.5.1 From 6312d6cc7c5bc80984758a70e2c368d8b4fb3bfd Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 13 Jun 2019 22:40:52 +1000 Subject: Expose statistics on extrems to prometheus (#5384) --- changelog.d/5384.feature | 1 + scripts/generate_signing_key.py | 2 +- synapse/metrics/__init__.py | 112 +++++++++++++++++++++++------ synapse/storage/events.py | 44 ++++++++---- tests/storage/test_cleanup_extrems.py | 128 +++++++++++++--------------------- tests/storage/test_event_metrics.py | 97 ++++++++++++++++++++++++++ tests/unittest.py | 61 +++++++++++++++- 7 files changed, 331 insertions(+), 114 deletions(-) create mode 100644 changelog.d/5384.feature create mode 100644 tests/storage/test_event_metrics.py (limited to 'synapse/storage') diff --git a/changelog.d/5384.feature b/changelog.d/5384.feature new file mode 100644 index 0000000000..9497f521c8 --- /dev/null +++ b/changelog.d/5384.feature @@ -0,0 +1 @@ +Statistics on forward extremities per room are now exposed via Prometheus. diff --git a/scripts/generate_signing_key.py b/scripts/generate_signing_key.py index ba3ba97395..36e9140b50 100755 --- a/scripts/generate_signing_key.py +++ b/scripts/generate_signing_key.py @@ -16,7 +16,7 @@ import argparse import sys -from signedjson.key import write_signing_keys, generate_signing_key +from signedjson.key import generate_signing_key, write_signing_keys from synapse.util.stringutils import random_string diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index ef48984fdd..539c353528 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -25,7 +25,7 @@ import six import attr from prometheus_client import Counter, Gauge, Histogram -from prometheus_client.core import REGISTRY, GaugeMetricFamily +from prometheus_client.core import REGISTRY, GaugeMetricFamily, HistogramMetricFamily from twisted.internet import reactor @@ -40,7 +40,6 @@ HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") class RegistryProxy(object): - @staticmethod def collect(): for metric in REGISTRY.collect(): @@ -63,10 +62,7 @@ class LaterGauge(object): try: calls = self.caller() except Exception: - logger.exception( - "Exception running callback for LaterGauge(%s)", - self.name, - ) + logger.exception("Exception running callback for LaterGauge(%s)", self.name) yield g return @@ -116,9 +112,7 @@ class InFlightGauge(object): # Create a class which have the sub_metrics values as attributes, which # default to 0 on initialization. Used to pass to registered callbacks. self._metrics_class = attr.make_class( - "_MetricsEntry", - attrs={x: attr.ib(0) for x in sub_metrics}, - slots=True, + "_MetricsEntry", attrs={x: attr.ib(0) for x in sub_metrics}, slots=True ) # Counts number of in flight blocks for a given set of label values @@ -157,7 +151,9 @@ class InFlightGauge(object): Note: may be called by a separate thread. """ - in_flight = GaugeMetricFamily(self.name + "_total", self.desc, labels=self.labels) + in_flight = GaugeMetricFamily( + self.name + "_total", self.desc, labels=self.labels + ) metrics_by_key = {} @@ -179,7 +175,9 @@ class InFlightGauge(object): yield in_flight for name in self.sub_metrics: - gauge = GaugeMetricFamily("_".join([self.name, name]), "", labels=self.labels) + gauge = GaugeMetricFamily( + "_".join([self.name, name]), "", labels=self.labels + ) for key, metrics in six.iteritems(metrics_by_key): gauge.add_metric(key, getattr(metrics, name)) yield gauge @@ -193,12 +191,75 @@ class InFlightGauge(object): all_gauges[self.name] = self +@attr.s(hash=True) +class BucketCollector(object): + """ + Like a Histogram, but allows buckets to be point-in-time instead of + incrementally added to. + + Args: + name (str): Base name of metric to be exported to Prometheus. + data_collector (callable -> dict): A synchronous callable that + returns a dict mapping bucket to number of items in the + bucket. If these buckets are not the same as the buckets + given to this class, they will be remapped into them. + buckets (list[float]): List of floats/ints of the buckets to + give to Prometheus. +Inf is ignored, if given. + + """ + + name = attr.ib() + data_collector = attr.ib() + buckets = attr.ib() + + def collect(self): + + # Fetch the data -- this must be synchronous! + data = self.data_collector() + + buckets = {} + + res = [] + for x in data.keys(): + for i, bound in enumerate(self.buckets): + if x <= bound: + buckets[bound] = buckets.get(bound, 0) + data[x] + break + + for i in self.buckets: + res.append([i, buckets.get(i, 0)]) + + res.append(["+Inf", sum(data.values())]) + + metric = HistogramMetricFamily( + self.name, + "", + buckets=res, + sum_value=sum([x * y for x, y in data.items()]), + ) + yield metric + + def __attrs_post_init__(self): + self.buckets = [float(x) for x in self.buckets if x != "+Inf"] + if self.buckets != sorted(self.buckets): + raise ValueError("Buckets not sorted") + + self.buckets = tuple(self.buckets) + + if self.name in all_gauges.keys(): + logger.warning("%s already registered, reregistering" % (self.name,)) + REGISTRY.unregister(all_gauges.pop(self.name)) + + REGISTRY.register(self) + all_gauges[self.name] = self + + # # Detailed CPU metrics # -class CPUMetrics(object): +class CPUMetrics(object): def __init__(self): ticks_per_sec = 100 try: @@ -237,13 +298,28 @@ gc_time = Histogram( "python_gc_time", "Time taken to GC (sec)", ["gen"], - buckets=[0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.50, 1.00, 2.50, - 5.00, 7.50, 15.00, 30.00, 45.00, 60.00], + buckets=[ + 0.0025, + 0.005, + 0.01, + 0.025, + 0.05, + 0.10, + 0.25, + 0.50, + 1.00, + 2.50, + 5.00, + 7.50, + 15.00, + 30.00, + 45.00, + 60.00, + ], ) class GCCounts(object): - def collect(self): cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"]) for n, m in enumerate(gc.get_count()): @@ -279,9 +355,7 @@ sent_transactions_counter = Counter("synapse_federation_client_sent_transactions events_processed_counter = Counter("synapse_federation_client_events_processed", "") event_processing_loop_counter = Counter( - "synapse_event_processing_loop_count", - "Event processing loop iterations", - ["name"], + "synapse_event_processing_loop_count", "Event processing loop iterations", ["name"] ) event_processing_loop_room_count = Counter( @@ -311,7 +385,6 @@ last_ticked = time.time() class ReactorLastSeenMetric(object): - def collect(self): cm = GaugeMetricFamily( "python_twisted_reactor_last_seen", @@ -325,7 +398,6 @@ REGISTRY.register(ReactorLastSeenMetric()) def runUntilCurrentTimer(func): - @functools.wraps(func) def f(*args, **kwargs): now = reactor.seconds() diff --git a/synapse/storage/events.py b/synapse/storage/events.py index f9162be9b9..1578403f79 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -17,7 +17,7 @@ import itertools import logging -from collections import OrderedDict, deque, namedtuple +from collections import Counter as c_counter, OrderedDict, deque, namedtuple from functools import wraps from six import iteritems, text_type @@ -33,6 +33,7 @@ from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 +from synapse.metrics import BucketCollector from synapse.metrics.background_process_metrics import run_as_background_process from synapse.state import StateResolutionStore from synapse.storage.background_updates import BackgroundUpdateStore @@ -220,13 +221,38 @@ class EventsStore( EventsWorkerStore, BackgroundUpdateStore, ): - def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) self._event_persist_queue = _EventPeristenceQueue() self._state_resolution_handler = hs.get_state_resolution_handler() + # Collect metrics on the number of forward extremities that exist. + self._current_forward_extremities_amount = {} + + BucketCollector( + "synapse_forward_extremities", + lambda: self._current_forward_extremities_amount, + buckets=[1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"] + ) + + # Read the extrems every 60 minutes + hs.get_clock().looping_call(self._read_forward_extremities, 60 * 60 * 1000) + + @defer.inlineCallbacks + def _read_forward_extremities(self): + def fetch(txn): + txn.execute( + """ + select count(*) c from event_forward_extremities + group by room_id + """ + ) + return txn.fetchall() + + res = yield self.runInteraction("read_forward_extremities", fetch) + self._current_forward_extremities_amount = c_counter(list(x[0] for x in res)) + @defer.inlineCallbacks def persist_events(self, events_and_contexts, backfilled=False): """ @@ -568,17 +594,11 @@ class EventsStore( ) txn.execute(sql, batch) - results.extend( - r[0] - for r in txn - if not json.loads(r[1]).get("soft_failed") - ) + results.extend(r[0] for r in txn if not json.loads(r[1]).get("soft_failed")) for chunk in batch_iter(event_ids, 100): yield self.runInteraction( - "_get_events_which_are_prevs", - _get_events_which_are_prevs_txn, - chunk, + "_get_events_which_are_prevs", _get_events_which_are_prevs_txn, chunk ) defer.returnValue(results) @@ -640,9 +660,7 @@ class EventsStore( for chunk in batch_iter(event_ids, 100): yield self.runInteraction( - "_get_prevs_before_rejected", - _get_prevs_before_rejected_txn, - chunk, + "_get_prevs_before_rejected", _get_prevs_before_rejected_txn, chunk ) defer.returnValue(existing_prevs) diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py index 6aa8b8b3c6..f4c81ef77d 100644 --- a/tests/storage/test_cleanup_extrems.py +++ b/tests/storage/test_cleanup_extrems.py @@ -15,7 +15,6 @@ import os.path -from synapse.api.constants import EventTypes from synapse.storage import prepare_database from synapse.types import Requester, UserID @@ -23,17 +22,12 @@ from tests.unittest import HomeserverTestCase class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): - """Test the background update to clean forward extremities table. """ - def make_homeserver(self, reactor, clock): - # Hack until we understand why test_forked_graph_cleanup fails with v4 - config = self.default_config() - config['default_room_version'] = '1' - return self.setup_test_homeserver(config=config) + Test the background update to clean forward extremities table. + """ def prepare(self, reactor, clock, homeserver): self.store = homeserver.get_datastore() - self.event_creator = homeserver.get_event_creation_handler() self.room_creator = homeserver.get_room_creation_handler() # Create a test user and room @@ -42,56 +36,6 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): info = self.get_success(self.room_creator.create_room(self.requester, {})) self.room_id = info["room_id"] - def create_and_send_event(self, soft_failed=False, prev_event_ids=None): - """Create and send an event. - - Args: - soft_failed (bool): Whether to create a soft failed event or not - prev_event_ids (list[str]|None): Explicitly set the prev events, - or if None just use the default - - Returns: - str: The new event's ID. - """ - prev_events_and_hashes = None - if prev_event_ids: - prev_events_and_hashes = [[p, {}, 0] for p in prev_event_ids] - - event, context = self.get_success( - self.event_creator.create_event( - self.requester, - { - "type": EventTypes.Message, - "room_id": self.room_id, - "sender": self.user.to_string(), - "content": {"body": "", "msgtype": "m.text"}, - }, - prev_events_and_hashes=prev_events_and_hashes, - ) - ) - - if soft_failed: - event.internal_metadata.soft_failed = True - - self.get_success( - self.event_creator.send_nonmember_event(self.requester, event, context) - ) - - return event.event_id - - def add_extremity(self, event_id): - """Add the given event as an extremity to the room. - """ - self.get_success( - self.store._simple_insert( - table="event_forward_extremities", - values={"room_id": self.room_id, "event_id": event_id}, - desc="test_add_extremity", - ) - ) - - self.store.get_latest_event_ids_in_room.invalidate((self.room_id,)) - def run_background_update(self): """Re run the background update to clean up the extremities. """ @@ -131,10 +75,16 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): """ # Create the room graph - event_id_1 = self.create_and_send_event() - event_id_2 = self.create_and_send_event(True, [event_id_1]) - event_id_3 = self.create_and_send_event(True, [event_id_2]) - event_id_4 = self.create_and_send_event(False, [event_id_3]) + event_id_1 = self.create_and_send_event(self.room_id, self.user) + event_id_2 = self.create_and_send_event( + self.room_id, self.user, True, [event_id_1] + ) + event_id_3 = self.create_and_send_event( + self.room_id, self.user, True, [event_id_2] + ) + event_id_4 = self.create_and_send_event( + self.room_id, self.user, False, [event_id_3] + ) # Check the latest events are as expected latest_event_ids = self.get_success( @@ -154,12 +104,16 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): Where SF* are soft failed, and with extremities of A and B """ # Create the room graph - event_id_a = self.create_and_send_event() - event_id_sf1 = self.create_and_send_event(True, [event_id_a]) - event_id_b = self.create_and_send_event(False, [event_id_sf1]) + event_id_a = self.create_and_send_event(self.room_id, self.user) + event_id_sf1 = self.create_and_send_event( + self.room_id, self.user, True, [event_id_a] + ) + event_id_b = self.create_and_send_event( + self.room_id, self.user, False, [event_id_sf1] + ) # Add the new extremity and check the latest events are as expected - self.add_extremity(event_id_a) + self.add_extremity(self.room_id, event_id_a) latest_event_ids = self.get_success( self.store.get_latest_event_ids_in_room(self.room_id) @@ -185,13 +139,19 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): Where SF* are soft failed, and with extremities of A and B """ # Create the room graph - event_id_a = self.create_and_send_event() - event_id_sf1 = self.create_and_send_event(True, [event_id_a]) - event_id_sf2 = self.create_and_send_event(True, [event_id_sf1]) - event_id_b = self.create_and_send_event(False, [event_id_sf2]) + event_id_a = self.create_and_send_event(self.room_id, self.user) + event_id_sf1 = self.create_and_send_event( + self.room_id, self.user, True, [event_id_a] + ) + event_id_sf2 = self.create_and_send_event( + self.room_id, self.user, True, [event_id_sf1] + ) + event_id_b = self.create_and_send_event( + self.room_id, self.user, False, [event_id_sf2] + ) # Add the new extremity and check the latest events are as expected - self.add_extremity(event_id_a) + self.add_extremity(self.room_id, event_id_a) latest_event_ids = self.get_success( self.store.get_latest_event_ids_in_room(self.room_id) @@ -227,16 +187,26 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): """ # Create the room graph - event_id_a = self.create_and_send_event() - event_id_b = self.create_and_send_event() - event_id_sf1 = self.create_and_send_event(True, [event_id_a]) - event_id_sf2 = self.create_and_send_event(True, [event_id_a, event_id_b]) - event_id_sf3 = self.create_and_send_event(True, [event_id_sf1]) - self.create_and_send_event(True, [event_id_sf2, event_id_sf3]) # SF4 - event_id_c = self.create_and_send_event(False, [event_id_sf3]) + event_id_a = self.create_and_send_event(self.room_id, self.user) + event_id_b = self.create_and_send_event(self.room_id, self.user) + event_id_sf1 = self.create_and_send_event( + self.room_id, self.user, True, [event_id_a] + ) + event_id_sf2 = self.create_and_send_event( + self.room_id, self.user, True, [event_id_a, event_id_b] + ) + event_id_sf3 = self.create_and_send_event( + self.room_id, self.user, True, [event_id_sf1] + ) + self.create_and_send_event( + self.room_id, self.user, True, [event_id_sf2, event_id_sf3] + ) # SF4 + event_id_c = self.create_and_send_event( + self.room_id, self.user, False, [event_id_sf3] + ) # Add the new extremity and check the latest events are as expected - self.add_extremity(event_id_a) + self.add_extremity(self.room_id, event_id_a) latest_event_ids = self.get_success( self.store.get_latest_event_ids_in_room(self.room_id) diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py new file mode 100644 index 0000000000..20a068f1fc --- /dev/null +++ b/tests/storage/test_event_metrics.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.metrics import REGISTRY +from synapse.types import Requester, UserID + +from tests.unittest import HomeserverTestCase + + +class ExtremStatisticsTestCase(HomeserverTestCase): + def test_exposed_to_prometheus(self): + """ + Forward extremity counts are exposed via Prometheus. + """ + room_creator = self.hs.get_room_creation_handler() + + user = UserID("alice", "test") + requester = Requester(user, None, False, None, None) + + # Real events, forward extremities + events = [(3, 2), (6, 2), (4, 6)] + + for event_count, extrems in events: + info = self.get_success(room_creator.create_room(requester, {})) + room_id = info["room_id"] + + last_event = None + + # Make a real event chain + for i in range(event_count): + ev = self.create_and_send_event(room_id, user, False, last_event) + last_event = [ev] + + # Sprinkle in some extremities + for i in range(extrems): + ev = self.create_and_send_event(room_id, user, False, last_event) + + # Let it run for a while, then pull out the statistics from the + # Prometheus client registry + self.reactor.advance(60 * 60 * 1000) + self.pump(1) + + items = list( + filter( + lambda x: x.name == "synapse_forward_extremities", + list(REGISTRY.collect()), + ) + ) + + # Check the values are what we want + buckets = {} + _count = 0 + _sum = 0 + + for i in items[0].samples: + if i[0].endswith("_bucket"): + buckets[i[1]['le']] = i[2] + elif i[0].endswith("_count"): + _count = i[2] + elif i[0].endswith("_sum"): + _sum = i[2] + + # 3 buckets, 2 with 2 extrems, 1 with 6 extrems (bucketed as 7), and + # +Inf which is all + self.assertEqual( + buckets, + { + 1.0: 0, + 2.0: 2, + 3.0: 0, + 5.0: 0, + 7.0: 1, + 10.0: 0, + 15.0: 0, + 20.0: 0, + 50.0: 0, + 100.0: 0, + 200.0: 0, + 500.0: 0, + "+Inf": 3, + }, + ) + # 3 rooms, with 10 total events + self.assertEqual(_count, 3) + self.assertEqual(_sum, 10) diff --git a/tests/unittest.py b/tests/unittest.py index 7dbb64af59..b6dc7932ce 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -27,11 +27,12 @@ import twisted.logger from twisted.internet.defer import Deferred from twisted.trial import unittest +from synapse.api.constants import EventTypes from synapse.config.homeserver import HomeServerConfig from synapse.http.server import JsonResource from synapse.http.site import SynapseRequest from synapse.server import HomeServer -from synapse.types import UserID, create_requester +from synapse.types import Requester, UserID, create_requester from synapse.util.logcontext import LoggingContext from tests.server import get_clock, make_request, render, setup_test_homeserver @@ -442,6 +443,64 @@ class HomeserverTestCase(TestCase): access_token = channel.json_body["access_token"] return access_token + def create_and_send_event( + self, room_id, user, soft_failed=False, prev_event_ids=None + ): + """ + Create and send an event. + + Args: + soft_failed (bool): Whether to create a soft failed event or not + prev_event_ids (list[str]|None): Explicitly set the prev events, + or if None just use the default + + Returns: + str: The new event's ID. + """ + event_creator = self.hs.get_event_creation_handler() + secrets = self.hs.get_secrets() + requester = Requester(user, None, False, None, None) + + prev_events_and_hashes = None + if prev_event_ids: + prev_events_and_hashes = [[p, {}, 0] for p in prev_event_ids] + + event, context = self.get_success( + event_creator.create_event( + requester, + { + "type": EventTypes.Message, + "room_id": room_id, + "sender": user.to_string(), + "content": {"body": secrets.token_hex(), "msgtype": "m.text"}, + }, + prev_events_and_hashes=prev_events_and_hashes, + ) + ) + + if soft_failed: + event.internal_metadata.soft_failed = True + + self.get_success( + event_creator.send_nonmember_event(requester, event, context) + ) + + return event.event_id + + def add_extremity(self, room_id, event_id): + """ + Add the given event as an extremity to the room. + """ + self.get_success( + self.hs.get_datastore()._simple_insert( + table="event_forward_extremities", + values={"room_id": room_id, "event_id": event_id}, + desc="test_add_extremity", + ) + ) + + self.hs.get_datastore().get_latest_event_ids_in_room.invalidate((room_id,)) + def attempt_wrong_password_login(self, username, password): """Attempts to login as the user with the given password, asserting that the attempt *fails*. -- cgit 1.5.1 From d0530382eeff053547304532167c0e4654af172c Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 14 Jun 2019 13:18:24 +0100 Subject: Track deactivated accounts in the database (#5378) --- changelog.d/5378.misc | 1 + synapse/handlers/deactivate_account.py | 4 + synapse/storage/registration.py | 114 +++++++++++++++++++++ .../schema/delta/55/users_alter_deactivated.sql | 19 ++++ tests/rest/client/v2_alpha/test_account.py | 45 ++++++++ 5 files changed, 183 insertions(+) create mode 100644 changelog.d/5378.misc create mode 100644 synapse/storage/schema/delta/55/users_alter_deactivated.sql (limited to 'synapse/storage') diff --git a/changelog.d/5378.misc b/changelog.d/5378.misc new file mode 100644 index 0000000000..365e49d634 --- /dev/null +++ b/changelog.d/5378.misc @@ -0,0 +1 @@ +Track deactivated accounts in the database. diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 6a91f7698e..b29089d82c 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2017, 2018 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -114,6 +115,9 @@ class DeactivateAccountHandler(BaseHandler): # parts users from rooms (if it isn't already running) self._start_user_parting() + # Mark the user as deactivated. + yield self.store.set_user_deactivated_status(user_id, True) + defer.returnValue(identity_server_supports_unbinding) def _start_user_parting(self): diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 1dd1182e82..4c5751b57f 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import re from six import iterkeys @@ -31,6 +32,8 @@ from synapse.util.caches.descriptors import cached, cachedInlineCallbacks THIRTY_MINUTES_IN_MS = 30 * 60 * 1000 +logger = logging.getLogger(__name__) + class RegistrationWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): @@ -598,11 +601,75 @@ class RegistrationStore( "user_threepids_grandfather", self._bg_user_threepids_grandfather, ) + self.register_background_update_handler( + "users_set_deactivated_flag", self._backgroud_update_set_deactivated_flag, + ) + # Create a background job for culling expired 3PID validity tokens hs.get_clock().looping_call( self.cull_expired_threepid_validation_tokens, THIRTY_MINUTES_IN_MS, ) + @defer.inlineCallbacks + def _backgroud_update_set_deactivated_flag(self, progress, batch_size): + """Retrieves a list of all deactivated users and sets the 'deactivated' flag to 1 + for each of them. + """ + + last_user = progress.get("user_id", "") + + def _backgroud_update_set_deactivated_flag_txn(txn): + txn.execute( + """ + SELECT + users.name, + COUNT(access_tokens.token) AS count_tokens, + COUNT(user_threepids.address) AS count_threepids + FROM users + LEFT JOIN access_tokens ON (access_tokens.user_id = users.name) + LEFT JOIN user_threepids ON (user_threepids.user_id = users.name) + WHERE password_hash IS NULL OR password_hash = '' + AND users.name > ? + GROUP BY users.name + ORDER BY users.name ASC + LIMIT ?; + """, + (last_user, batch_size), + ) + + rows = self.cursor_to_dict(txn) + + if not rows: + return True + + rows_processed_nb = 0 + + for user in rows: + if not user["count_tokens"] and not user["count_threepids"]: + self.set_user_deactivated_status_txn(txn, user["user_id"], True) + rows_processed_nb += 1 + + logger.info("Marked %d rows as deactivated", rows_processed_nb) + + self._background_update_progress_txn( + txn, "users_set_deactivated_flag", {"user_id": rows[-1]["user_id"]} + ) + + if batch_size > len(rows): + return True + else: + return False + + end = yield self.runInteraction( + "users_set_deactivated_flag", + _backgroud_update_set_deactivated_flag_txn, + ) + + if end: + yield self._end_background_update("users_set_deactivated_flag") + + defer.returnValue(batch_size) + @defer.inlineCallbacks def add_access_token_to_user(self, user_id, token, device_id=None): """Adds an access token for the given user. @@ -1268,3 +1335,50 @@ class RegistrationStore( "delete_threepid_session", delete_threepid_session_txn, ) + + def set_user_deactivated_status_txn(self, txn, user_id, deactivated): + self._simple_update_one_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + updatevalues={"deactivated": 1 if deactivated else 0}, + ) + self._invalidate_cache_and_stream( + txn, self.get_user_deactivated_status, (user_id,), + ) + + @defer.inlineCallbacks + def set_user_deactivated_status(self, user_id, deactivated): + """Set the `deactivated` property for the provided user to the provided value. + + Args: + user_id (str): The ID of the user to set the status for. + deactivated (bool): The value to set for `deactivated`. + """ + + yield self.runInteraction( + "set_user_deactivated_status", + self.set_user_deactivated_status_txn, + user_id, deactivated, + ) + + @cachedInlineCallbacks() + def get_user_deactivated_status(self, user_id): + """Retrieve the value for the `deactivated` property for the provided user. + + Args: + user_id (str): The ID of the user to retrieve the status for. + + Returns: + defer.Deferred(bool): The requested value. + """ + + res = yield self._simple_select_one_onecol( + table="users", + keyvalues={"name": user_id}, + retcol="deactivated", + desc="get_user_deactivated_status", + ) + + # Convert the integer into a boolean. + defer.returnValue(res == 1) diff --git a/synapse/storage/schema/delta/55/users_alter_deactivated.sql b/synapse/storage/schema/delta/55/users_alter_deactivated.sql new file mode 100644 index 0000000000..dabdde489b --- /dev/null +++ b/synapse/storage/schema/delta/55/users_alter_deactivated.sql @@ -0,0 +1,19 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE users ADD deactivated SMALLINT DEFAULT 0 NOT NULL; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('users_set_deactivated_flag', '{}'); diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py index 0d1c0868ce..a60a4a3b87 100644 --- a/tests/rest/client/v2_alpha/test_account.py +++ b/tests/rest/client/v2_alpha/test_account.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import os import re from email.parser import Parser @@ -239,3 +240,47 @@ class PasswordResetTestCase(unittest.HomeserverTestCase): ) self.render(request) self.assertEquals(expected_code, channel.code, channel.result) + + +class DeactivateTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + account.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver() + return hs + + def test_deactivate_account(self): + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + + request_data = json.dumps({ + "auth": { + "type": "m.login.password", + "user": user_id, + "password": "test", + }, + "erase": False, + }) + request, channel = self.make_request( + "POST", + "account/deactivate", + request_data, + access_token=tok, + ) + self.render(request) + self.assertEqual(request.code, 200) + + store = self.hs.get_datastore() + + # Check that the user has been marked as deactivated. + self.assertTrue(self.get_success(store.get_user_deactivated_status(user_id))) + + # Check that this access token has been invalidated. + request, channel = self.make_request("GET", "account/whoami") + self.render(request) + self.assertEqual(request.code, 401) -- cgit 1.5.1 From 3ed595e327aee6d45ed0371c98e828d724c26b2d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Jun 2019 14:07:32 +0100 Subject: Prometheus histograms are cumalative --- synapse/metrics/__init__.py | 1 - synapse/storage/events.py | 3 ++- tests/storage/test_event_metrics.py | 20 ++++++++++---------- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 0d3ae1a43d..8aee14a8a8 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -224,7 +224,6 @@ class BucketCollector(object): for i, bound in enumerate(self.buckets): if x <= bound: buckets[bound] = buckets.get(bound, 0) + data[x] - break for i in self.buckets: res.append([str(i), buckets.get(i, 0)]) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1578403f79..f631fb1733 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -228,7 +228,8 @@ class EventsStore( self._state_resolution_handler = hs.get_state_resolution_handler() # Collect metrics on the number of forward extremities that exist. - self._current_forward_extremities_amount = {} + # Counter of number of extremities to count + self._current_forward_extremities_amount = c_counter() BucketCollector( "synapse_forward_extremities", diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py index 1655fcdafc..19f9ccf5e0 100644 --- a/tests/storage/test_event_metrics.py +++ b/tests/storage/test_event_metrics.py @@ -64,16 +64,16 @@ class ExtremStatisticsTestCase(HomeserverTestCase): expected = set([ b'synapse_forward_extremities_bucket{le="1.0"} 0.0', b'synapse_forward_extremities_bucket{le="2.0"} 2.0', - b'synapse_forward_extremities_bucket{le="3.0"} 0.0', - b'synapse_forward_extremities_bucket{le="5.0"} 0.0', - b'synapse_forward_extremities_bucket{le="7.0"} 1.0', - b'synapse_forward_extremities_bucket{le="10.0"} 0.0', - b'synapse_forward_extremities_bucket{le="15.0"} 0.0', - b'synapse_forward_extremities_bucket{le="20.0"} 0.0', - b'synapse_forward_extremities_bucket{le="50.0"} 0.0', - b'synapse_forward_extremities_bucket{le="100.0"} 0.0', - b'synapse_forward_extremities_bucket{le="200.0"} 0.0', - b'synapse_forward_extremities_bucket{le="500.0"} 0.0', + b'synapse_forward_extremities_bucket{le="3.0"} 2.0', + b'synapse_forward_extremities_bucket{le="5.0"} 2.0', + b'synapse_forward_extremities_bucket{le="7.0"} 3.0', + b'synapse_forward_extremities_bucket{le="10.0"} 3.0', + b'synapse_forward_extremities_bucket{le="15.0"} 3.0', + b'synapse_forward_extremities_bucket{le="20.0"} 3.0', + b'synapse_forward_extremities_bucket{le="50.0"} 3.0', + b'synapse_forward_extremities_bucket{le="100.0"} 3.0', + b'synapse_forward_extremities_bucket{le="200.0"} 3.0', + b'synapse_forward_extremities_bucket{le="500.0"} 3.0', b'synapse_forward_extremities_bucket{le="+Inf"} 3.0', b'synapse_forward_extremities_count 3.0', b'synapse_forward_extremities_sum 10.0', -- cgit 1.5.1 From 6d56a694f4cbfaf9c57a56837d4170e6c6783f3c Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 7 Jun 2019 15:30:54 +0100 Subject: Don't send renewal emails to deactivated users --- changelog.d/5394.bugfix | 1 + synapse/handlers/account_validity.py | 3 ++ synapse/handlers/deactivate_account.py | 6 +++ synapse/storage/_base.py | 4 +- synapse/storage/registration.py | 14 ++++++ tests/rest/client/v2_alpha/test_register.py | 67 ++++++++++++++++++----------- 6 files changed, 68 insertions(+), 27 deletions(-) create mode 100644 changelog.d/5394.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/5394.bugfix b/changelog.d/5394.bugfix new file mode 100644 index 0000000000..2ad9fbe82c --- /dev/null +++ b/changelog.d/5394.bugfix @@ -0,0 +1 @@ +Fix a bug where deactivated users could receive renewal emails if the account validity feature is on. diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index 261446517d..5e0b92eb1c 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -110,6 +110,9 @@ class AccountValidityHandler(object): # Stop right here if the user doesn't have at least one email address. # In this case, they will have to ask their server admin to renew their # account manually. + # We don't need to do a specific check to make sure the account isn't + # deactivated, as a deactivated account isn't supposed to have any + # email address attached to it. if not addresses: return diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index b29089d82c..7378b56c1d 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -43,6 +43,8 @@ class DeactivateAccountHandler(BaseHandler): # it left off (if it has work left to do). hs.get_reactor().callWhenRunning(self._start_user_parting) + self._account_validity_enabled = hs.config.account_validity.enabled + @defer.inlineCallbacks def deactivate_account(self, user_id, erase_data, id_server=None): """Deactivate a user's account @@ -115,6 +117,10 @@ class DeactivateAccountHandler(BaseHandler): # parts users from rooms (if it isn't already running) self._start_user_parting() + # Remove all information on the user from the account_validity table. + if self._account_validity_enabled: + yield self.store.delete_account_validity_for_user(user_id) + # Mark the user as deactivated. yield self.store.set_user_deactivated_status(user_id, True) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index ae891aa332..941c07fce5 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -299,12 +299,12 @@ class SQLBaseStore(object): def select_users_with_no_expiration_date_txn(txn): """Retrieves the list of registered users with no expiration date from the - database. + database, filtering out deactivated users. """ sql = ( "SELECT users.name FROM users" " LEFT JOIN account_validity ON (users.name = account_validity.user_id)" - " WHERE account_validity.user_id is NULL;" + " WHERE account_validity.user_id is NULL AND users.deactivated = 0;" ) txn.execute(sql, []) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 4c5751b57f..9f910eac9c 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -251,6 +251,20 @@ class RegistrationWorkerStore(SQLBaseStore): desc="set_renewal_mail_status", ) + @defer.inlineCallbacks + def delete_account_validity_for_user(self, user_id): + """Deletes the entry for the given user in the account validity table, removing + their expiration date and renewal token. + + Args: + user_id (str): ID of the user to remove from the account validity table. + """ + yield self._simple_delete_one( + table="account_validity", + keyvalues={"user_id": user_id}, + desc="delete_account_validity_for_user", + ) + @defer.inlineCallbacks def is_server_admin(self, user): res = yield self._simple_select_one_onecol( diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 8536e6777a..b35b215446 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -26,7 +26,7 @@ from synapse.api.constants import LoginType from synapse.api.errors import Codes from synapse.appservice import ApplicationService from synapse.rest.client.v1 import login -from synapse.rest.client.v2_alpha import account_validity, register, sync +from synapse.rest.client.v2_alpha import account, account_validity, register, sync from tests import unittest @@ -308,6 +308,7 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): login.register_servlets, sync.register_servlets, account_validity.register_servlets, + account.register_servlets, ] def make_homeserver(self, reactor, clock): @@ -358,20 +359,7 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): def test_renewal_email(self): self.email_attempts = [] - user_id = self.register_user("kermit", "monkey") - tok = self.login("kermit", "monkey") - # We need to manually add an email address otherwise the handler will do - # nothing. - now = self.hs.clock.time_msec() - self.get_success( - self.store.user_add_threepid( - user_id=user_id, - medium="email", - address="kermit@example.com", - validated_at=now, - added_at=now, - ) - ) + (user_id, tok) = self.create_user() # Move 6 days forward. This should trigger a renewal email to be sent. self.reactor.advance(datetime.timedelta(days=6).total_seconds()) @@ -396,6 +384,44 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): def test_manual_email_send(self): self.email_attempts = [] + (user_id, tok) = self.create_user() + request, channel = self.make_request( + b"POST", + "/_matrix/client/unstable/account_validity/send_mail", + access_token=tok, + ) + self.render(request) + self.assertEquals(channel.result["code"], b"200", channel.result) + + self.assertEqual(len(self.email_attempts), 1) + + def test_deactivated_user(self): + self.email_attempts = [] + + (user_id, tok) = self.create_user() + + request_data = json.dumps({ + "auth": { + "type": "m.login.password", + "user": user_id, + "password": "monkey", + }, + "erase": False, + }) + request, channel = self.make_request( + "POST", + "account/deactivate", + request_data, + access_token=tok, + ) + self.render(request) + self.assertEqual(request.code, 200) + + self.reactor.advance(datetime.timedelta(days=8).total_seconds()) + + self.assertEqual(len(self.email_attempts), 0) + + def create_user(self): user_id = self.register_user("kermit", "monkey") tok = self.login("kermit", "monkey") # We need to manually add an email address otherwise the handler will do @@ -410,16 +436,7 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): added_at=now, ) ) - - request, channel = self.make_request( - b"POST", - "/_matrix/client/unstable/account_validity/send_mail", - access_token=tok, - ) - self.render(request) - self.assertEquals(channel.result["code"], b"200", channel.result) - - self.assertEqual(len(self.email_attempts), 1) + return (user_id, tok) def test_manual_email_send_expired_account(self): user_id = self.register_user("kermit", "monkey") -- cgit 1.5.1 From e0b77b004db33563ec0a08fe835406dbc1591b6b Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 14 Jun 2019 16:00:45 +0100 Subject: Fix background job for deactivated flag --- synapse/storage/registration.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 9f910eac9c..d36917e4d6 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -642,7 +642,9 @@ class RegistrationStore( FROM users LEFT JOIN access_tokens ON (access_tokens.user_id = users.name) LEFT JOIN user_threepids ON (user_threepids.user_id = users.name) - WHERE password_hash IS NULL OR password_hash = '' + WHERE (users.password_hash IS NULL OR users.password_hash = '') + AND (users.appservice_id IS NULL OR users.appservice_id = '') + AND users.is_guest = 0 AND users.name > ? GROUP BY users.name ORDER BY users.name ASC @@ -666,7 +668,7 @@ class RegistrationStore( logger.info("Marked %d rows as deactivated", rows_processed_nb) self._background_update_progress_txn( - txn, "users_set_deactivated_flag", {"user_id": rows[-1]["user_id"]} + txn, "users_set_deactivated_flag", {"user_id": rows[-1]["name"]} ) if batch_size > len(rows): -- cgit 1.5.1 From eba7caf09fe9bb5f5a0d4b17c5dde1413343cadc Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 18 Jun 2019 00:59:00 +1000 Subject: Remove Postgres 9.4 support (#5448) --- .buildkite/docker-compose.py35.pg94.yaml | 21 --------------------- .buildkite/pipeline.yml | 17 ----------------- CONTRIBUTING.rst | 19 +++++++++---------- UPGRADE.rst | 31 +++++++++++++++++++++++++++++-- changelog.d/5448.removal | 1 + docker/Dockerfile-pgtests | 4 ++-- docker/run_pg_tests.sh | 2 +- docs/postgres.rst | 4 ++-- synapse/storage/engines/postgres.py | 8 ++++++-- synapse/storage/search.py | 22 ---------------------- 10 files changed, 50 insertions(+), 79 deletions(-) delete mode 100644 .buildkite/docker-compose.py35.pg94.yaml create mode 100644 changelog.d/5448.removal (limited to 'synapse/storage') diff --git a/.buildkite/docker-compose.py35.pg94.yaml b/.buildkite/docker-compose.py35.pg94.yaml deleted file mode 100644 index 978aedd115..0000000000 --- a/.buildkite/docker-compose.py35.pg94.yaml +++ /dev/null @@ -1,21 +0,0 @@ -version: '3.1' - -services: - - postgres: - image: postgres:9.4 - environment: - POSTGRES_PASSWORD: postgres - - testenv: - image: python:3.5 - depends_on: - - postgres - env_file: .env - environment: - SYNAPSE_POSTGRES_HOST: postgres - SYNAPSE_POSTGRES_USER: postgres - SYNAPSE_POSTGRES_PASSWORD: postgres - working_dir: /app - volumes: - - ..:/app diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6c6229a205..20c7aab5a7 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -116,23 +116,6 @@ steps: - exit_status: 2 limit: 2 - - label: ":python: 3.5 / :postgres: 9.4" - env: - TRIAL_FLAGS: "-j 4" - command: - - "bash -c 'python -m pip install tox && python -m tox -e py35-postgres,codecov'" - plugins: - - docker-compose#v2.1.0: - run: testenv - config: - - .buildkite/docker-compose.py35.pg94.yaml - retry: - automatic: - - exit_status: -1 - limit: 2 - - exit_status: 2 - limit: 2 - - label: ":python: 3.5 / :postgres: 9.5" env: TRIAL_FLAGS: "-j 4" diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 9a283ced6e..2c44422a0e 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -30,21 +30,20 @@ use github's pull request workflow to review the contribution, and either ask you to make any refinements needed or merge it and make them ourselves. The changes will then land on master when we next do a release. -We use `CircleCI `_ and `Travis CI -`_ for continuous integration. All -pull requests to synapse get automatically tested by Travis and CircleCI. -If your change breaks the build, this will be shown in GitHub, so please -keep an eye on the pull request for feedback. +We use `CircleCI `_ and `Buildkite +`_ for continuous integration. +Buildkite builds need to be authorised by a maintainer. If your change breaks +the build, this will be shown in GitHub, so please keep an eye on the pull +request for feedback. To run unit tests in a local development environment, you can use: -- ``tox -e py27`` (requires tox to be installed by ``pip install tox``) for - SQLite-backed Synapse on Python 2.7. -- ``tox -e py35`` for SQLite-backed Synapse on Python 3.5. +- ``tox -e py35`` (requires tox to be installed by ``pip install tox``) + for SQLite-backed Synapse on Python 3.5. - ``tox -e py36`` for SQLite-backed Synapse on Python 3.6. -- ``tox -e py27-postgres`` for PostgreSQL-backed Synapse on Python 2.7 +- ``tox -e py36-postgres`` for PostgreSQL-backed Synapse on Python 3.6 (requires a running local PostgreSQL with access to create databases). -- ``./test_postgresql.sh`` for PostgreSQL-backed Synapse on Python 2.7 +- ``./test_postgresql.sh`` for PostgreSQL-backed Synapse on Python 3.5 (requires Docker). Entirely self-contained, recommended if you don't want to set up PostgreSQL yourself. diff --git a/UPGRADE.rst b/UPGRADE.rst index 6032a505c9..1fb109a218 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -49,6 +49,33 @@ returned by the Client-Server API: # configured on port 443. curl -kv https:///_matrix/client/versions 2>&1 | grep "Server:" +Upgrading to v1.1 +================= + +Synapse 1.1 removes support for older Python and PostgreSQL versions, as +outlined in `our deprecation notice `_. + +Minimum Python Version +---------------------- + +Synapse v1.1 has a minimum Python requirement of Python 3.5. Python 3.6 or +Python 3.7 are recommended as they have improved internal string handling, +significantly reducing memory usage. + +If you use current versions of the Matrix.org-distributed Debian packages or +Docker images, action is not required. + +If you install Synapse in a Python virtual environment, please see "Upgrading to +v0.34.0" for notes on setting up a new virtualenv under Python 3. + +Minimum PostgreSQL Version +-------------------------- + +If using PostgreSQL under Synapse, you will need to use PostgreSQL 9.5 or above. +Please see the +`PostgreSQL documentation `_ +for more details on upgrading your database. + Upgrading to v1.0 ================= @@ -71,11 +98,11 @@ server in a closed federation. This can be done in one of two ways:- * Configure a whitelist of server domains to trust via ``federation_certificate_verification_whitelist``. See the `sample configuration file `_ -for more details on these settings. +for more details on these settings. Email ----- -When a user requests a password reset, Synapse will send an email to the +When a user requests a password reset, Synapse will send an email to the user to confirm the request. Previous versions of Synapse delegated the job of sending this email to an diff --git a/changelog.d/5448.removal b/changelog.d/5448.removal new file mode 100644 index 0000000000..33b9859dae --- /dev/null +++ b/changelog.d/5448.removal @@ -0,0 +1 @@ +PostgreSQL 9.4 is no longer supported. Synapse requires Postgres 9.5+ or above for Postgres support. diff --git a/docker/Dockerfile-pgtests b/docker/Dockerfile-pgtests index 7da8eeb9eb..3bfee845c6 100644 --- a/docker/Dockerfile-pgtests +++ b/docker/Dockerfile-pgtests @@ -3,10 +3,10 @@ FROM matrixdotorg/sytest:latest # The Sytest image doesn't come with python, so install that -RUN apt-get -qq install -y python python-dev python-pip +RUN apt-get update && apt-get -qq install -y python3 python3-dev python3-pip # We need tox to run the tests in run_pg_tests.sh -RUN pip install tox +RUN python3 -m pip install tox ADD run_pg_tests.sh /pg_tests.sh ENTRYPOINT /pg_tests.sh diff --git a/docker/run_pg_tests.sh b/docker/run_pg_tests.sh index e77424c41a..d18d1e4c8e 100755 --- a/docker/run_pg_tests.sh +++ b/docker/run_pg_tests.sh @@ -17,4 +17,4 @@ su -c '/usr/lib/postgresql/9.6/bin/pg_ctl -w -D /var/lib/postgresql/data start' # Run the tests cd /src export TRIAL_FLAGS="-j 4" -tox --workdir=/tmp -e py27-postgres +tox --workdir=/tmp -e py35-postgres diff --git a/docs/postgres.rst b/docs/postgres.rst index e81e10403f..33f58e3ace 100644 --- a/docs/postgres.rst +++ b/docs/postgres.rst @@ -1,7 +1,7 @@ Using Postgres -------------- -Postgres version 9.4 or later is known to work. +Postgres version 9.5 or later is known to work. Install postgres client libraries ================================= @@ -16,7 +16,7 @@ a postgres database. * For other pre-built packages, please consult the documentation from the relevant package. -* If you installed synapse `in a virtualenv +* If you installed synapse `in a virtualenv <../INSTALL.md#installing-from-source>`_, you can install the library with:: ~/synapse/env/bin/pip install matrix-synapse[postgres] diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 1b97ee74e3..289b6bc281 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -45,6 +45,10 @@ class PostgresEngine(object): # together. For example, version 8.1.5 will be returned as 80105 self._version = db_conn.server_version + # Are we on a supported PostgreSQL version? + if self._version < 90500: + raise RuntimeError("Synapse requires PostgreSQL 9.5+ or above.") + db_conn.set_isolation_level( self.module.extensions.ISOLATION_LEVEL_REPEATABLE_READ ) @@ -64,9 +68,9 @@ class PostgresEngine(object): @property def can_native_upsert(self): """ - Can we use native UPSERTs? This requires PostgreSQL 9.5+. + Can we use native UPSERTs? """ - return self._version >= 90500 + return True def is_deadlock(self, error): if isinstance(error, self.module.DatabaseError): diff --git a/synapse/storage/search.py b/synapse/storage/search.py index ff49eaae02..10a27c207a 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -341,29 +341,7 @@ class SearchStore(BackgroundUpdateStore): for entry in entries ) - # inserts to a GIN index are normally batched up into a pending - # list, and then all committed together once the list gets to a - # certain size. The trouble with that is that postgres (pre-9.5) - # uses work_mem to determine the length of the list, and work_mem - # is typically very large. - # - # We therefore reduce work_mem while we do the insert. - # - # (postgres 9.5 uses the separate gin_pending_list_limit setting, - # so doesn't suffer the same problem, but changing work_mem will - # be harmless) - # - # Note that we don't need to worry about restoring it on - # exception, because exceptions will cause the transaction to be - # rolled back, including the effects of the SET command. - # - # Also: we use SET rather than SET LOCAL because there's lots of - # other stuff going on in this transaction, which want to have the - # normal work_mem setting. - - txn.execute("SET work_mem='256kB'") txn.executemany(sql, args) - txn.execute("RESET work_mem") elif isinstance(self.database_engine, Sqlite3Engine): sql = ( -- cgit 1.5.1