From a4bf72c30c5953b721a64eae89db186fa8735bb3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 29 Aug 2019 17:38:51 +0100 Subject: Censor redactions in DB after a month --- synapse/storage/events.py | 88 +++++++++++++++++++++- .../storage/schema/delta/56/redaction_censor.sql | 17 +++++ 2 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/56/redaction_censor.sql (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 5a95c36a8b..2970da6829 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -23,7 +23,7 @@ from functools import wraps from six import iteritems, text_type from six.moves import range -from canonicaljson import json +from canonicaljson import encode_canonical_json, json from prometheus_client import Counter, Histogram from twisted.internet import defer @@ -33,6 +33,7 @@ from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 +from synapse.events.utils import prune_event_dict from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable from synapse.logging.utils import log_function from synapse.metrics import BucketCollector @@ -262,6 +263,13 @@ class EventsStore( hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000) + def _censor_redactions(): + return run_as_background_process( + "_censor_redactions", self._censor_redactions + ) + + hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000) + @defer.inlineCallbacks def _read_forward_extremities(self): def fetch(txn): @@ -1548,6 +1556,84 @@ class EventsStore( (event.event_id, event.redacts), ) + @defer.inlineCallbacks + def _censor_redactions(self): + """Censors all redactions older than a month that haven't been censored. + + By censor we mean update the event_json table with the redacted event. + + Returns: + Deferred + """ + + if self.stream_ordering_month_ago is None: + return + + max_pos = self.stream_ordering_month_ago + + # We fetch all redactions that point to an event that we have that has + # a stream ordering from over a month ago, that we haven't yet censored + # in the DB. + sql = """ + SELECT er.event_id, redacts FROM redactions + INNER JOIN events AS er USING (event_id) + INNER JOIN events AS eb ON (er.room_id = eb.room_id AND redacts = eb.event_id) + WHERE NOT have_censored + AND ? <= er.stream_ordering AND er.stream_ordering <= ? + ORDER BY er.stream_ordering ASC + LIMIT ? + """ + + rows = yield self._execute( + "_censor_redactions_fetch", None, sql, -max_pos, max_pos, 100 + ) + + updates = [] + + for redaction_id, event_id in rows: + redaction_event = yield self.get_event(redaction_id, allow_none=True) + original_event = yield self.get_event( + event_id, allow_rejected=True, allow_none=True + ) + + # The SQL above ensures that we have both the redaction and + # original event, so if the `get_event` calls return None it + # means that the redaction wasn't allowed. Either way we know that + # the result won't change so we mark the fact that we've checked. + if ( + redaction_event + and original_event + and original_event.internal_metadata.is_redacted() + ): + # Redaction was allowed + pruned_json = encode_canonical_json( + prune_event_dict(original_event.get_dict()) + ) + else: + # Redaction wasn't allowed + pruned_json = None + + updates.append((redaction_id, event_id, pruned_json)) + + def _update_censor_txn(txn): + for redaction_id, event_id, pruned_json in updates: + if pruned_json: + self._simple_update_one_txn( + txn, + table="event_json", + keyvalues={"event_id": event_id}, + updatevalues={"json": pruned_json}, + ) + + self._simple_update_one_txn( + txn, + table="redactions", + keyvalues={"event_id": redaction_id}, + updatevalues={"have_censored": True}, + ) + + yield self.runInteraction("_update_censor_txn", _update_censor_txn) + @defer.inlineCallbacks def count_daily_messages(self): """ diff --git a/synapse/storage/schema/delta/56/redaction_censor.sql b/synapse/storage/schema/delta/56/redaction_censor.sql new file mode 100644 index 0000000000..fe51b02309 --- /dev/null +++ b/synapse/storage/schema/delta/56/redaction_censor.sql @@ -0,0 +1,17 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE redactions ADD COLUMN have_censored BOOL NOT NULL DEFAULT false; +CREATE INDEX redactions_have_censored ON redactions(event_id) WHERE not have_censored; -- cgit 1.4.1 From 3ff0422d2dbfa668df365da99a4b7caeea85528d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Sep 2019 17:16:03 +0100 Subject: Make redaction retention period configurable --- docs/sample_config.yaml | 5 +++++ synapse/config/server.py | 15 +++++++++++++++ synapse/storage/events.py | 6 ++++-- tests/storage/test_redaction.py | 4 +++- 4 files changed, 27 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 43969bbb70..e23b80d2b8 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -306,6 +306,11 @@ listeners: # #allow_per_room_profiles: false +# How long to keep redacted events in unredacted form in the database. +# By default redactions are kept indefinitely. +# +#redaction_retention_period: 30d + ## TLS ## diff --git a/synapse/config/server.py b/synapse/config/server.py index 2abdef0971..8efab924d4 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -162,6 +162,16 @@ class ServerConfig(Config): self.mau_trial_days = config.get("mau_trial_days", 0) + # How long to keep redacted events in the database in unredacted form + # before redacting them. + redaction_retention_period = config.get("redaction_retention_period") + if redaction_retention_period: + self.redaction_retention_period = self.parse_duration( + redaction_retention_period + ) + else: + self.redaction_retention_period = None + # Options to disable HS self.hs_disabled = config.get("hs_disabled", False) self.hs_disabled_message = config.get("hs_disabled_message", "") @@ -718,6 +728,11 @@ class ServerConfig(Config): # Defaults to 'true'. # #allow_per_room_profiles: false + + # How long to keep redacted events in unredacted form in the database. + # By default redactions are kept indefinitely. + # + #redaction_retention_period: 30d """ % locals() ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2970da6829..d0d1781c90 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1566,10 +1566,12 @@ class EventsStore( Deferred """ - if self.stream_ordering_month_ago is None: + if not self.hs.config.redaction_retention_period: return - max_pos = self.stream_ordering_month_ago + max_pos = yield self.find_first_stream_ordering_after_ts( + self._clock.time_msec() - self.hs.config.redaction_retention_period + ) # We fetch all redactions that point to an event that we have that has # a stream ordering from over a month ago, that we haven't yet censored diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 0c9f3c7071..f0e86d41a8 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -344,7 +344,9 @@ class RedactionTestCase(unittest.HomeserverTestCase): {"content": {"body": "t", "msgtype": "message"}}, json.loads(event_json) ) - # Advance by 30 days + # Advance by 30 days, then advance again to ensure that the looping call + # for updating the stream position gets called and then the looping call + # for the censoring gets called. self.reactor.advance(60 * 60 * 24 * 31) self.reactor.advance(60 * 60 * 2) -- cgit 1.4.1 From 05bae6b4fc97943b3738bac3175da1bc49f13512 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 10:13:14 +0100 Subject: Add opentracing span for HTTP push --- synapse/push/httppusher.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index bd5d53af91..6299587808 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -22,6 +22,7 @@ from prometheus_client import Counter from twisted.internet import defer from twisted.internet.error import AlreadyCalled, AlreadyCancelled +from synapse.logging import opentracing from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import PusherConfigException @@ -194,7 +195,17 @@ class HttpPusher(object): ) for push_action in unprocessed: - processed = yield self._process_one(push_action) + with opentracing.start_active_span( + "http-push", + tags={ + "authenticated_entity": self.user_id, + "event_id": push_action["event_id"], + "app_id": self.app_id, + "app_display_name": self.app_display_name, + }, + ): + processed = yield self._process_one(push_action) + if processed: http_push_processed_counter.inc() self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC -- cgit 1.4.1 From be618e055178f4aa9865ab426182218312bed07f Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 14:43:51 +0300 Subject: Only count real users when checking for auto-creation of auto-join room Previously if the first registered user was a "support" or "bot" user, when the first real user registers, the auto-join rooms were not created. Fix to exclude non-real (ie users with a special user type) users when counting how many users there are to determine whether we should auto-create a room. Signed-off-by: Jason Robinson --- changelog.d/6004.bugfix | 1 + synapse/handlers/register.py | 12 ++++-------- synapse/storage/registration.py | 39 +++++++++++++++++++++++++++++++++++++++ tests/handlers/test_register.py | 29 +++++++++++++++++++++++++++-- 4 files changed, 71 insertions(+), 10 deletions(-) create mode 100644 changelog.d/6004.bugfix (limited to 'synapse') diff --git a/changelog.d/6004.bugfix b/changelog.d/6004.bugfix new file mode 100644 index 0000000000..45c179c8fd --- /dev/null +++ b/changelog.d/6004.bugfix @@ -0,0 +1 @@ +Only count real users when checking for auto-creation of auto-join room. diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 975da57ffd..06bd03b77c 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -275,16 +275,12 @@ class RegistrationHandler(BaseHandler): fake_requester = create_requester(user_id) # try to create the room if we're the first real user on the server. Note - # that an auto-generated support user is not a real user and will never be + # that an auto-generated support or bot user is not a real user and will never be # the user to create the room should_auto_create_rooms = False - is_support = yield self.store.is_support_user(user_id) - # There is an edge case where the first user is the support user, then - # the room is never created, though this seems unlikely and - # recoverable from given the support user being involved in the first - # place. - if self.hs.config.autocreate_auto_join_rooms and not is_support: - count = yield self.store.count_all_users() + is_real_user = yield self.store.is_real_user(user_id) + if self.hs.config.autocreate_auto_join_rooms and is_real_user: + count = yield self.store.count_real_users() should_auto_create_rooms = count == 1 for r in self.hs.config.auto_join_rooms: logger.info("Auto-joining %s to %s", user_id, r) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 5138792a5f..b054d86ae0 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -322,6 +322,21 @@ class RegistrationWorkerStore(SQLBaseStore): return None + @cachedInlineCallbacks() + def is_real_user(self, user_id): + """Determines if the user is a real user, ie does not have a 'user_type'. + + Args: + user_id (str): user id to test + + Returns: + Deferred[bool]: True if user 'user_type' is null or empty string + """ + res = yield self.runInteraction( + "is_real_user", self.is_real_user_txn, user_id + ) + return res + @cachedInlineCallbacks() def is_support_user(self, user_id): """Determines if the user is of type UserTypes.SUPPORT @@ -337,6 +352,16 @@ class RegistrationWorkerStore(SQLBaseStore): ) return res + def is_real_user_txn(self, txn, user_id): + res = self._simple_select_one_onecol_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + retcol="user_type", + allow_none=True, + ) + return True if res is None or res == "" else False + def is_support_user_txn(self, txn, user_id): res = self._simple_select_one_onecol_txn( txn=txn, @@ -421,6 +446,20 @@ class RegistrationWorkerStore(SQLBaseStore): ret = yield self.runInteraction("count_users", _count_users) return ret + @defer.inlineCallbacks + def count_real_users(self): + """Counts all users without a special user_type registered on the homeserver.""" + + def _count_users(txn): + txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''") + rows = self.cursor_to_dict(txn) + if rows: + return rows[0]["users"] + return 0 + + ret = yield self.runInteraction("count_real_users", _count_users) + return ret + @defer.inlineCallbacks def find_next_generated_user_id_localpart(self): """ diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index e10296a5e4..1e9ba3a201 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -171,11 +171,11 @@ class RegistrationTestCase(unittest.HomeserverTestCase): rooms = self.get_success(self.store.get_rooms_for_user(user_id)) self.assertEqual(len(rooms), 0) - def test_auto_create_auto_join_rooms_when_support_user_exists(self): + def test_auto_create_auto_join_rooms_when_user_is_not_a_real_user(self): room_alias_str = "#room:test" self.hs.config.auto_join_rooms = [room_alias_str] - self.store.is_support_user = Mock(return_value=True) + self.store.is_real_user = Mock(return_value=False) user_id = self.get_success(self.handler.register_user(localpart="support")) rooms = self.get_success(self.store.get_rooms_for_user(user_id)) self.assertEqual(len(rooms), 0) @@ -183,6 +183,31 @@ class RegistrationTestCase(unittest.HomeserverTestCase): room_alias = RoomAlias.from_string(room_alias_str) self.get_failure(directory_handler.get_association(room_alias), SynapseError) + def test_auto_create_auto_join_rooms_when_user_is_the_first_real_user(self): + room_alias_str = "#room:test" + self.hs.config.auto_join_rooms = [room_alias_str] + + self.store.count_real_users = Mock(return_value=1) + self.store.is_real_user = Mock(return_value=True) + user_id = self.get_success(self.handler.register_user(localpart="real")) + rooms = self.get_success(self.store.get_rooms_for_user(user_id)) + directory_handler = self.hs.get_handlers().directory_handler + room_alias = RoomAlias.from_string(room_alias_str) + room_id = self.get_success(directory_handler.get_association(room_alias)) + + self.assertTrue(room_id["room_id"] in rooms) + self.assertEqual(len(rooms), 1) + + def test_auto_create_auto_join_rooms_when_user_is_not_the_first_real_user(self): + room_alias_str = "#room:test" + self.hs.config.auto_join_rooms = [room_alias_str] + + self.store.count_real_users = Mock(return_value=2) + self.store.is_real_user = Mock(return_value=True) + user_id = self.get_success(self.handler.register_user(localpart="real")) + rooms = self.get_success(self.store.get_rooms_for_user(user_id)) + self.assertEqual(len(rooms), 0) + def test_auto_create_auto_join_where_no_consent(self): """Test to ensure that the first user is not auto-joined to a room if they have not given general consent. -- cgit 1.4.1 From 62fac9d969cea98694093a5f80bed6bdd4848968 Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 14:59:35 +0300 Subject: Auto-fix a few code style issues Signed-off-by: Jason Robinson --- synapse/storage/registration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index b054d86ae0..9387b29503 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -332,9 +332,7 @@ class RegistrationWorkerStore(SQLBaseStore): Returns: Deferred[bool]: True if user 'user_type' is null or empty string """ - res = yield self.runInteraction( - "is_real_user", self.is_real_user_txn, user_id - ) + res = yield self.runInteraction("is_real_user", self.is_real_user_txn, user_id) return res @cachedInlineCallbacks() @@ -451,7 +449,9 @@ class RegistrationWorkerStore(SQLBaseStore): """Counts all users without a special user_type registered on the homeserver.""" def _count_users(txn): - txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''") + txn.execute( + "SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''" + ) rows = self.cursor_to_dict(txn) if rows: return rows[0]["users"] -- cgit 1.4.1 From 80e14a8546efb9e2f9edec3b1de0a8b943351252 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:23:41 +0100 Subject: Handle setting retention period to 0 --- synapse/config/server.py | 2 +- synapse/storage/events.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/config/server.py b/synapse/config/server.py index 8efab924d4..aa71835dc3 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -165,7 +165,7 @@ class ServerConfig(Config): # How long to keep redacted events in the database in unredacted form # before redacting them. redaction_retention_period = config.get("redaction_retention_period") - if redaction_retention_period: + if redaction_retention_period is not None: self.redaction_retention_period = self.parse_duration( redaction_retention_period ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a5d13ddc49..77ba7eb2af 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1566,7 +1566,7 @@ class EventsStore( Deferred """ - if not self.hs.config.redaction_retention_period: + if self.hs.config.redaction_retention_period is None: return max_pos = yield self.find_first_stream_ordering_after_ts( -- cgit 1.4.1 From fffe17b77d06927aaf64fa80be5b765c870a4ef5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:24:24 +0100 Subject: Don't start looping call unless enabled --- synapse/storage/events.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 77ba7eb2af..9ef7aefd97 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -268,7 +268,8 @@ class EventsStore( "_censor_redactions", self._censor_redactions ) - hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000) + if self.hs.config.redaction_retention_period is not None: + hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000) @defer.inlineCallbacks def _read_forward_extremities(self): -- cgit 1.4.1 From 916c69722833dd94c53d0fedeec8cc42d2085e73 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:31:00 +0100 Subject: Fixup comment --- synapse/storage/events.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 9ef7aefd97..4484ae7ce0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -269,7 +269,7 @@ class EventsStore( ) if self.hs.config.redaction_retention_period is not None: - hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000) + hs.get_clock().looping_call(_censor_redactions, 5 * 60 * 1000) @defer.inlineCallbacks def _read_forward_extremities(self): @@ -1574,9 +1574,17 @@ class EventsStore( self._clock.time_msec() - self.hs.config.redaction_retention_period ) - # We fetch all redactions that point to an event that we have that has - # a stream ordering from over a month ago, that we haven't yet censored - # in the DB. + # We fetch all redactions that: + # 1. point to an event we have that has, + # 2. has a stream ordering from before the cut off, and + # 3. we haven't yet censored. + # + # This is limited to 100 events to ensure that we don't try and do too + # much at once. We'll get called again so this should eventually catch + # up. + # + # We use the range [-max_pos, max_pos] to handle backfilled events, + # which are given negative stream ordering. sql = """ SELECT er.event_id, redacts FROM redactions INNER JOIN events AS er USING (event_id) -- cgit 1.4.1 From e7184a437062ae21846b8e071ded73526209e90c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:33:38 +0100 Subject: Use better names in SQL --- synapse/storage/events.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 4484ae7ce0..0da6e0b1a1 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1586,12 +1586,15 @@ class EventsStore( # We use the range [-max_pos, max_pos] to handle backfilled events, # which are given negative stream ordering. sql = """ - SELECT er.event_id, redacts FROM redactions - INNER JOIN events AS er USING (event_id) - INNER JOIN events AS eb ON (er.room_id = eb.room_id AND redacts = eb.event_id) + SELECT redact_event.event_id, redacts FROM redactions + INNER JOIN events AS redact_event USING (event_id) + INNER JOIN events AS original_event ON ( + redact_event.room_id = original_event.room_id + AND redacts = original_event.event_id + ) WHERE NOT have_censored - AND ? <= er.stream_ordering AND er.stream_ordering <= ? - ORDER BY er.stream_ordering ASC + AND ? <= redact_event.stream_ordering AND redact_event.stream_ordering <= ? + ORDER BY redact_event.stream_ordering ASC LIMIT ? """ -- cgit 1.4.1 From 8b9ade8c7871c862cf2122a156f00e411cd7a276 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:40:05 +0100 Subject: Default to censoring redactions after seven days --- docs/sample_config.yaml | 8 +++++--- synapse/config/server.py | 10 ++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index e23b80d2b8..24adc3da2f 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -306,10 +306,12 @@ listeners: # #allow_per_room_profiles: false -# How long to keep redacted events in unredacted form in the database. -# By default redactions are kept indefinitely. +# How long to keep redacted events in unredacted form in the database. After +# this period redacted events get replaced with their redacted form in the DB. # -#redaction_retention_period: 30d +# Defaults to `7d`. Set to `null` to disable. +# +redaction_retention_period: 7d ## TLS ## diff --git a/synapse/config/server.py b/synapse/config/server.py index aa71835dc3..c8b9fe2d0f 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -164,7 +164,7 @@ class ServerConfig(Config): # How long to keep redacted events in the database in unredacted form # before redacting them. - redaction_retention_period = config.get("redaction_retention_period") + redaction_retention_period = config.get("redaction_retention_period", "7d") if redaction_retention_period is not None: self.redaction_retention_period = self.parse_duration( redaction_retention_period @@ -729,10 +729,12 @@ class ServerConfig(Config): # #allow_per_room_profiles: false - # How long to keep redacted events in unredacted form in the database. - # By default redactions are kept indefinitely. + # How long to keep redacted events in unredacted form in the database. After + # this period redacted events get replaced with their redacted form in the DB. # - #redaction_retention_period: 30d + # Defaults to `7d`. Set to `null` to disable. + # + redaction_retention_period: 7d """ % locals() ) -- cgit 1.4.1 From 8c03cd0e5f73fb59ee773dc6cce77f2dc4dab827 Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 16:40:40 +0300 Subject: Simplify is_real_user_txn check to trust user_type is null if real user Signed-off-by: Jason Robinson --- synapse/storage/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 9387b29503..54b0846c54 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -358,7 +358,7 @@ class RegistrationWorkerStore(SQLBaseStore): retcol="user_type", allow_none=True, ) - return True if res is None or res == "" else False + return res is None def is_support_user_txn(self, txn, user_id): res = self._simple_select_one_onecol_txn( -- cgit 1.4.1 From e89fea4f04c6fc7df41c5cade63609b513a98073 Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 16:43:32 +0300 Subject: Simplify count_real_users SQL to only count user_type is null rows Signed-off-by: Jason Robinson --- synapse/storage/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 54b0846c54..c0ca25733b 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -450,7 +450,7 @@ class RegistrationWorkerStore(SQLBaseStore): def _count_users(txn): txn.execute( - "SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''" + "SELECT COUNT(*) AS users FROM users where user_type is null" ) rows = self.cursor_to_dict(txn) if rows: -- cgit 1.4.1 From 580f3df9b2573c0278dd952d1478689e5cd23a7b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 15:08:24 +0100 Subject: Fix comments --- synapse/storage/events.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 0da6e0b1a1..ddf7ab6479 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1559,7 +1559,8 @@ class EventsStore( @defer.inlineCallbacks def _censor_redactions(self): - """Censors all redactions older than a month that haven't been censored. + """Censors all redactions older than the configured period that haven't + been censored yet. By censor we mean update the event_json table with the redacted event. @@ -1575,7 +1576,7 @@ class EventsStore( ) # We fetch all redactions that: - # 1. point to an event we have that has, + # 1. point to an event we have, # 2. has a stream ordering from before the cut off, and # 3. we haven't yet censored. # -- cgit 1.4.1 From aaed6b39e140195a0f2b48e4de0519e08f16a119 Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 17:10:02 +0300 Subject: Fix code style, again Signed-off-by: Jason Robinson --- synapse/storage/registration.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index c0ca25733b..109052fa41 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -449,9 +449,7 @@ class RegistrationWorkerStore(SQLBaseStore): """Counts all users without a special user_type registered on the homeserver.""" def _count_users(txn): - txn.execute( - "SELECT COUNT(*) AS users FROM users where user_type is null" - ) + txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null") rows = self.cursor_to_dict(txn) if rows: return rows[0]["users"] -- cgit 1.4.1 From aeb9b2179eaa4b468bec937570d3ac7de7ccaaea Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 10 Sep 2019 00:14:58 +1000 Subject: Add a build info metric to Prometheus (#6005) --- changelog.d/6005.feature | 1 + synapse/metrics/__init__.py | 12 ++++++++++++ tests/test_metrics.py | 22 ++++++++++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6005.feature (limited to 'synapse') diff --git a/changelog.d/6005.feature b/changelog.d/6005.feature new file mode 100644 index 0000000000..ed6491d3e4 --- /dev/null +++ b/changelog.d/6005.feature @@ -0,0 +1 @@ +The new Prometheus metric `synapse_build_info` exposes the Python version, OS version, and Synapse version of the running server. diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 488280b4a6..b5c9595cb9 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -29,11 +29,13 @@ from prometheus_client.core import REGISTRY, GaugeMetricFamily, HistogramMetricF from twisted.internet import reactor +import synapse from synapse.metrics._exposition import ( MetricsResource, generate_latest, start_http_server, ) +from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) @@ -385,6 +387,16 @@ event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name" # finished being processed. event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"]) +# Build info of the running server. +build_info = Gauge( + "synapse_build_info", "Build information", ["pythonversion", "version", "osversion"] +) +build_info.labels( + " ".join([platform.python_implementation(), platform.python_version()]), + get_version_string(synapse), + " ".join([platform.system(), platform.release()]), +).set(1) + last_ticked = time.time() diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 2edbae5c6d..270f853d60 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2018 New Vector Ltd +# Copyright 2019 Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from synapse.metrics import InFlightGauge +from synapse.metrics import REGISTRY, InFlightGauge, generate_latest from tests import unittest @@ -111,3 +111,21 @@ class TestMauLimit(unittest.TestCase): } return results + + +class BuildInfoTests(unittest.TestCase): + def test_get_build(self): + """ + The synapse_build_info metric reports the OS version, Python version, + and Synapse version. + """ + items = list( + filter( + lambda x: b"synapse_build_info{" in x, + generate_latest(REGISTRY).split(b"\n"), + ) + ) + self.assertEqual(len(items), 1) + self.assertTrue(b"osversion=" in items[0]) + self.assertTrue(b"pythonversion=" in items[0]) + self.assertTrue(b"version=" in items[0]) -- cgit 1.4.1 From 60d3c57bd0c977cbe6b7585a2c1517cc4e2c16dd Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 10 Sep 2019 15:57:32 +0100 Subject: Use account_threepid_delegate for 3pid validation --- synapse/handlers/auth.py | 11 ++++- synapse/handlers/identity.py | 73 +++++++++++++++------------------ synapse/rest/client/v2_alpha/account.py | 3 +- 3 files changed, 45 insertions(+), 42 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index d0c0142740..374372b69e 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -444,7 +444,16 @@ class AuthHandler(BaseHandler): logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - threepid = yield identity_handler.threepid_from_creds(threepid_creds) + if medium == "email": + threepid = yield identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_email, threepid_creds + ) + elif medium == "msisdn": + threepid = yield identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_msisdn, threepid_creds + ) + else: + raise SynapseError(400, "Unrecognized threepid medium: %s" % (medium,)) elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: row = yield self.store.get_threepid_validation_session( medium, diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 71b5a87392..2dfb79fde1 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -75,59 +75,52 @@ class IdentityHandler(BaseHandler): return client_secret, id_server, id_access_token @defer.inlineCallbacks - def threepid_from_creds(self, creds, use_v2=True): + def threepid_from_creds(self, id_server, creds): """ - Retrieve and validate a threepid identitier from a "credentials" dictionary + Retrieve and validate a threepid identifier from a "credentials" dictionary against a + given identity server Args: - creds (dict[str, str]): Dictionary of credentials that contain the following keys: + id_server (str|None): The identity server to validate 3PIDs against. If None, + we will attempt to extract id_server creds + + creds (dict[str, str]): Dictionary containing the following key: + * id_server: An optional domain name of an identity server * client_secret|clientSecret: A unique secret str provided by the client - * id_server|idServer: the domain of the identity server to query - * id_access_token: The access token to authenticate to the identity - server with. Required if use_v2 is true - use_v2 (bool): Whether to use v2 Identity Service API endpoints + * sid: The ID of the validation session Returns: Deferred[dict[str,str|int]|None]: A dictionary consisting of response params to the /getValidated3pid endpoint of the Identity Service API, or None if the threepid was not found """ - client_secret, id_server, id_access_token = self._extract_items_from_creds_dict( - creds - ) - - # If an id_access_token is not supplied, force usage of v1 - if id_access_token is None: - use_v2 = False - - query_params = {"sid": creds["sid"], "client_secret": client_secret} - - # Decide which API endpoint URLs and query parameters to use - if use_v2: - url = "https://%s%s" % ( - id_server, - "/_matrix/identity/v2/3pid/getValidated3pid", + client_secret = creds.get("client_secret") or creds.get("clientSecret") + if not client_secret: + raise SynapseError( + 400, "Missing param client_secret in creds", errcode=Codes.MISSING_PARAM ) - query_params["id_access_token"] = id_access_token - else: - url = "https://%s%s" % ( - id_server, - "/_matrix/identity/api/v1/3pid/getValidated3pid", + session_id = creds.get("sid") + if not session_id: + raise SynapseError( + 400, "Missing param session_id in creds", errcode=Codes.MISSING_PARAM ) + if not id_server: + # Attempt to get the id_server from the creds dict + id_server = creds.get("id_server") + if not id_server: + raise SynapseError( + 400, "Missing param id_server in creds", errcode=Codes.MISSING_PARAM + ) + + query_params = {"sid": session_id, "client_secret": client_secret} + + url = "https://%s%s" % ( + id_server, + "/_matrix/identity/api/v1/3pid/getValidated3pid", + ) - try: - data = yield self.http_client.get_json(url, query_params) - return data if "medium" in data else None - except HttpResponseException as e: - if e.code != 404 or not use_v2: - # Generic failure - logger.info("getValidated3pid failed with Matrix error: %r", e) - raise e.to_synapse_error() - - # This identity server is too old to understand Identity Service API v2 - # Attempt v1 endpoint - logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", url) - return (yield self.threepid_from_creds(creds, use_v2=False)) + data = yield self.http_client.get_json(url, query_params) + return data if "medium" in data else None @defer.inlineCallbacks def bind_threepid(self, creds, mxid, use_v2=True): diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 785d01ea52..94a8fec8f7 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -523,7 +523,8 @@ class ThreepidRestServlet(RestServlet): requester = yield self.auth.get_user_by_req(request) user_id = requester.user.to_string() - threepid = yield self.identity_handler.threepid_from_creds(threepid_creds) + # Retrieve the identity server from the request + threepid = yield self.identity_handler.threepid_from_creds(None, threepid_creds) if not threepid: raise SynapseError(400, "Failed to auth 3pid", Codes.THREEPID_AUTH_FAILED) -- cgit 1.4.1 From b5833a2abf788a4144602c3e0de15d371608094b Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 10 Sep 2019 17:43:57 +0100 Subject: Add changelog --- changelog.d/6011.feature | 1 + synapse/handlers/identity.py | 6 +++--- synapse/rest/client/v2_alpha/account.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6011.feature (limited to 'synapse') diff --git a/changelog.d/6011.feature b/changelog.d/6011.feature new file mode 100644 index 0000000000..ad16acb12b --- /dev/null +++ b/changelog.d/6011.feature @@ -0,0 +1 @@ +Use account_threepid_delegate.email and account_threepid_delegate.msisdn for validating threepid sessions. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 2dfb79fde1..f6d1d1717e 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -84,8 +84,8 @@ class IdentityHandler(BaseHandler): id_server (str|None): The identity server to validate 3PIDs against. If None, we will attempt to extract id_server creds - creds (dict[str, str]): Dictionary containing the following key: - * id_server: An optional domain name of an identity server + creds (dict[str, str]): Dictionary containing the following keys: + * id_server|idServer: An optional domain name of an identity server * client_secret|clientSecret: A unique secret str provided by the client * sid: The ID of the validation session @@ -106,7 +106,7 @@ class IdentityHandler(BaseHandler): ) if not id_server: # Attempt to get the id_server from the creds dict - id_server = creds.get("id_server") + id_server = creds.get("id_server") or creds.get("idServer") if not id_server: raise SynapseError( 400, "Missing param id_server in creds", errcode=Codes.MISSING_PARAM diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 94a8fec8f7..2ea515d2f6 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -523,7 +523,7 @@ class ThreepidRestServlet(RestServlet): requester = yield self.auth.get_user_by_req(request) user_id = requester.user.to_string() - # Retrieve the identity server from the request + # Specify None as the identity server to retrieve it from the request body instead threepid = yield self.identity_handler.threepid_from_creds(None, threepid_creds) if not threepid: -- cgit 1.4.1 From cd17a2085eb517d24c68e33cd3906375a8baeb3b Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 11 Sep 2019 10:37:17 +0100 Subject: Remove origin parameter from add_display_name_to_third_party_invite and add params to docstring (#6010) Another small fixup noticed during work on a larger PR. The `origin` field of `add_display_name_to_third_party_invite` is not used and likely was just carried over from the `on_PUT` method of `FederationThirdPartyInviteExchangeServlet` which, like all other servlets, provides an `origin` argument. Since it's not used anywhere in the handler function though, we should remove it from the function arguments. --- changelog.d/6010.misc | 1 + synapse/federation/federation_server.py | 4 ++-- synapse/federation/transport/server.py | 2 +- synapse/handlers/federation.py | 7 ++++++- 4 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6010.misc (limited to 'synapse') diff --git a/changelog.d/6010.misc b/changelog.d/6010.misc new file mode 100644 index 0000000000..0659f12ebd --- /dev/null +++ b/changelog.d/6010.misc @@ -0,0 +1 @@ +Remove unused `origin` argument on FederationHandler.add_display_name_to_third_party_invite. \ No newline at end of file diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index e5f0b90aec..da06ab379d 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -669,9 +669,9 @@ class FederationServer(FederationBase): return ret @defer.inlineCallbacks - def on_exchange_third_party_invite_request(self, origin, room_id, event_dict): + def on_exchange_third_party_invite_request(self, room_id, event_dict): ret = yield self.handler.on_exchange_third_party_invite_request( - origin, room_id, event_dict + room_id, event_dict ) return ret diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 132a8fb5e6..7dc696c7ae 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -575,7 +575,7 @@ class FederationThirdPartyInviteExchangeServlet(BaseFederationServlet): async def on_PUT(self, origin, content, query, room_id): content = await self.handler.on_exchange_third_party_invite_request( - origin, room_id, content + room_id, content ) return 200, content diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 538b16efd6..f72b81d419 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2530,12 +2530,17 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks @log_function - def on_exchange_third_party_invite_request(self, origin, room_id, event_dict): + def on_exchange_third_party_invite_request(self, room_id, event_dict): """Handle an exchange_third_party_invite request from a remote server The remote server will call this when it wants to turn a 3pid invite into a normal m.room.member invite. + Args: + room_id (str): The ID of the room. + + event_dict (dict[str, Any]): Dictionary containing the event body. + Returns: Deferred: resolves (to None) """ -- cgit 1.4.1 From 54ce81c86d163b883df67b97540426759a9f6363 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 10:46:38 +0100 Subject: Allow use of different ratelimits for admin redactions. This is useful to allow room admins to quickly deal with a large number of abusive messages. --- synapse/config/ratelimiting.py | 13 +++++++++++++ synapse/handlers/_base.py | 43 +++++++++++++++++++++++++++++++----------- synapse/handlers/message.py | 8 +++++++- synapse/server.py | 4 ++++ 4 files changed, 56 insertions(+), 12 deletions(-) (limited to 'synapse') diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 33f31cf213..b4df6612d6 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -80,6 +80,12 @@ class RatelimitConfig(Config): "federation_rr_transactions_per_room_per_second", 50 ) + rc_admin_redaction = config.get("rc_admin_redaction") + if rc_admin_redaction: + self.rc_admin_redaction = RateLimitConfig(rc_admin_redaction) + else: + self.rc_admin_redaction = None + def generate_config_section(self, **kwargs): return """\ ## Ratelimiting ## @@ -102,6 +108,9 @@ class RatelimitConfig(Config): # - one for login that ratelimits login requests based on the account the # client is attempting to log into, based on the amount of failed login # attempts for this account. + # - one for ratelimiting redactions by room admins. If this is not explicitly + # set then it uses the same ratelimiting as per rc_message. This is useful + # to allow room admins to quickly deal with abuse quickly. # # The defaults are as shown below. # @@ -123,6 +132,10 @@ class RatelimitConfig(Config): # failed_attempts: # per_second: 0.17 # burst_count: 3 + # + #rc_admin_redaction: + # per_second: 1 + # burst_count: 50 # Ratelimiting settings for incoming federation diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index c29c78bd65..853b72d8e7 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -45,6 +45,7 @@ class BaseHandler(object): self.state_handler = hs.get_state_handler() self.distributor = hs.get_distributor() self.ratelimiter = hs.get_ratelimiter() + self.admin_redaction_ratelimiter = hs.get_admin_redaction_ratelimiter() self.clock = hs.get_clock() self.hs = hs @@ -53,7 +54,7 @@ class BaseHandler(object): self.event_builder_factory = hs.get_event_builder_factory() @defer.inlineCallbacks - def ratelimit(self, requester, update=True): + def ratelimit(self, requester, update=True, is_admin_redaction=False): """Ratelimits requests. Args: @@ -62,6 +63,9 @@ class BaseHandler(object): Set to False when doing multiple checks for one request (e.g. to check up front if we would reject the request), and set to True for the last call for a given request. + is_admin_redaction (bool): Whether this is a room admin/moderator + redacting an event. If so then we may apply different + ratelimits depending on config. Raises: LimitExceededError if the request should be ratelimited @@ -90,16 +94,33 @@ class BaseHandler(object): messages_per_second = override.messages_per_second burst_count = override.burst_count else: - messages_per_second = self.hs.config.rc_message.per_second - burst_count = self.hs.config.rc_message.burst_count - - allowed, time_allowed = self.ratelimiter.can_do_action( - user_id, - time_now, - rate_hz=messages_per_second, - burst_count=burst_count, - update=update, - ) + # We default to different values if this is an admin redaction and + # the config is set + if is_admin_redaction and self.hs.config.rc_admin_redaction: + messages_per_second = self.hs.config.rc_admin_redaction.per_second + burst_count = self.hs.config.rc_admin_redaction.burst_count + else: + messages_per_second = self.hs.config.rc_message.per_second + burst_count = self.hs.config.rc_message.burst_count + + if is_admin_redaction and self.hs.config.rc_admin_redaction: + # If we have separate config for admin redactions we use a separate + # ratelimiter. + allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action( + user_id, + time_now, + rate_hz=messages_per_second, + burst_count=burst_count, + update=update, + ) + else: + allowed, time_allowed = self.ratelimiter.can_do_action( + user_id, + time_now, + rate_hz=messages_per_second, + burst_count=burst_count, + update=update, + ) if not allowed: raise LimitExceededError( retry_after_ms=int(1000 * (time_allowed - time_now)) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 111f7c7e2f..184170ef8b 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -729,7 +729,13 @@ class EventCreationHandler(object): assert not self.config.worker_app if ratelimit: - yield self.base_handler.ratelimit(requester) + is_admin_redaction = ( + event.type == EventTypes.Redaction + and event.sender != requester.user.to_string() + ) + yield self.base_handler.ratelimit( + requester, is_admin_redaction=is_admin_redaction + ) yield self.base_handler.maybe_kick_guest_users(event, context) diff --git a/synapse/server.py b/synapse/server.py index 9e28dba2b1..1fcc7375d3 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -221,6 +221,7 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() self.ratelimiter = Ratelimiter() + self.admin_redaction_ratelimiter = Ratelimiter() self.registration_ratelimiter = Ratelimiter() self.datastore = None @@ -279,6 +280,9 @@ class HomeServer(object): def get_registration_ratelimiter(self): return self.registration_ratelimiter + def get_admin_redaction_ratelimiter(self): + return self.admin_redaction_ratelimiter + def build_federation_client(self): return FederationClient(self) -- cgit 1.4.1 From c64c3bb4c5b740e3f505708bc5dde0b5b29de6b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 11:16:17 +0100 Subject: Fix how we check for self redaction --- synapse/handlers/message.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 184170ef8b..f975909416 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -729,10 +729,24 @@ class EventCreationHandler(object): assert not self.config.worker_app if ratelimit: - is_admin_redaction = ( - event.type == EventTypes.Redaction - and event.sender != requester.user.to_string() - ) + # We check if this is a room admin redacting an event so that we + # can apply different ratelimiting. We do this by simply checking + # its not a self-redaction (to avoid having to look up whether the + # user is actually admin or not). + is_admin_redaction = False + if event.type == EventTypes.Redaction: + original_event = yield self.store.get_event( + event.redacts, + check_redacted=False, + get_prev_content=False, + allow_rejected=False, + allow_none=True, + ) + + is_admin_redaction = ( + original_event and event.sender != original_event.sender + ) + yield self.base_handler.ratelimit( requester, is_admin_redaction=is_admin_redaction ) -- cgit 1.4.1 From 3505ffcda7d04a9c0100ff423a2239d1e6340fd0 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 11 Sep 2019 11:59:45 +0100 Subject: Fix existing v2 identity server calls (MSC2140) (#6013) Two things I missed while implementing [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140/files#diff-c03a26de5ac40fb532de19cb7fc2aaf7R80). 1. Access tokens should be provided to the identity server as `access_token`, not `id_access_token`, even though the homeserver may accept the tokens as `id_access_token`. 2. Access tokens must be sent to the identity server in a query parameter, the JSON body is not allowed. We now send the access token as part of an `Authorization: ...` header, which fixes both things. The breaking code was added in https://github.com/matrix-org/synapse/pull/5892 Sytest PR: https://github.com/matrix-org/sytest/pull/697 --- changelog.d/6013.misc | 1 + synapse/handlers/identity.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6013.misc (limited to 'synapse') diff --git a/changelog.d/6013.misc b/changelog.d/6013.misc new file mode 100644 index 0000000000..939fe8c655 --- /dev/null +++ b/changelog.d/6013.misc @@ -0,0 +1 @@ +Compatibility with v2 Identity Service APIs other than /lookup. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index f0549666c3..f690fd04a3 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -74,6 +74,25 @@ class IdentityHandler(BaseHandler): id_access_token = creds.get("id_access_token") return client_secret, id_server, id_access_token + def create_id_access_token_header(self, id_access_token): + """Create an Authorization header for passing to SimpleHttpClient as the header value + of an HTTP request. + + Args: + id_access_token (str): An identity server access token. + + Returns: + list[str]: The ascii-encoded bearer token encased in a list. + """ + # Prefix with Bearer + bearer_token = "Bearer %s" % id_access_token + + # Encode headers to standard ascii + bearer_token.encode("ascii") + + # Return as a list as that's how SimpleHttpClient takes header values + return [bearer_token] + @defer.inlineCallbacks def threepid_from_creds(self, id_server, creds): """ @@ -155,15 +174,20 @@ class IdentityHandler(BaseHandler): use_v2 = False # Decide which API endpoint URLs to use + headers = {} bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid} if use_v2: bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,) - bind_data["id_access_token"] = id_access_token + headers["Authorization"] = self.create_id_access_token_header( + id_access_token + ) else: bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,) try: - data = yield self.http_client.post_json_get_json(bind_url, bind_data) + data = yield self.http_client.post_json_get_json( + bind_url, bind_data, headers=headers + ) logger.debug("bound threepid %r to %s", creds, mxid) # Remember where we bound the threepid -- cgit 1.4.1 From 57dd41a45b4df5d736e2f30d40926b60f367b500 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 13:54:50 +0100 Subject: Fix comments Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- synapse/config/ratelimiting.py | 2 +- synapse/handlers/_base.py | 2 +- synapse/handlers/message.py | 2 +- tests/rest/client/test_redactions.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index b4df6612d6..587e2862b7 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -110,7 +110,7 @@ class RatelimitConfig(Config): # attempts for this account. # - one for ratelimiting redactions by room admins. If this is not explicitly # set then it uses the same ratelimiting as per rc_message. This is useful - # to allow room admins to quickly deal with abuse quickly. + # to allow room admins to deal with abuse quickly. # # The defaults are as shown below. # diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 853b72d8e7..d15c6282fb 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -105,7 +105,7 @@ class BaseHandler(object): if is_admin_redaction and self.hs.config.rc_admin_redaction: # If we have separate config for admin redactions we use a separate - # ratelimiter. + # ratelimiter allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action( user_id, time_now, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f975909416..1f8272784e 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -731,7 +731,7 @@ class EventCreationHandler(object): if ratelimit: # We check if this is a room admin redacting an event so that we # can apply different ratelimiting. We do this by simply checking - # its not a self-redaction (to avoid having to look up whether the + # it's not a self-redaction (to avoid having to look up whether the # user is actually admin or not). is_admin_redaction = False if event.type == EventTypes.Redaction: diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py index 1b1e991c42..d2bcf256fa 100644 --- a/tests/rest/client/test_redactions.py +++ b/tests/rest/client/test_redactions.py @@ -197,8 +197,8 @@ class RedactionsTestCase(HomeserverTestCase): message_ids.append(b["event_id"]) self.reactor.advance(10) # To get around ratelimits - # as the moderator, send a bunch of redactions redaction + # as the moderator, send a bunch of redactions for msg_id in message_ids: # These should all succeed, even though this would be denied by - # standard message ratelimiter + # the standard message ratelimiter self._redact_event(self.mod_access_token, self.room_id, msg_id) -- cgit 1.4.1 From 6604b64fae970f534d3e2a61f2fbbe51599fa26d Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Wed, 11 Sep 2019 14:00:37 +0100 Subject: Check dependencies on setup in the nicer way. (#5989) --- changelog.d/5989.misc | 1 + synapse/config/metrics.py | 12 +++++------- synapse/config/repository.py | 27 +++++++-------------------- synapse/python_dependencies.py | 8 +++++++- 4 files changed, 20 insertions(+), 28 deletions(-) create mode 100644 changelog.d/5989.misc (limited to 'synapse') diff --git a/changelog.d/5989.misc b/changelog.d/5989.misc new file mode 100644 index 0000000000..9f2525fd3e --- /dev/null +++ b/changelog.d/5989.misc @@ -0,0 +1 @@ +Clean up dependency checking at setup. diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 653b990e67..9eb1e55ddb 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -16,11 +16,9 @@ import attr -from ._base import Config, ConfigError +from synapse.python_dependencies import DependencyException, check_requirements -MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentry - integration. - """ +from ._base import Config, ConfigError @attr.s @@ -51,9 +49,9 @@ class MetricsConfig(Config): self.sentry_enabled = "sentry" in config if self.sentry_enabled: try: - import sentry_sdk # noqa F401 - except ImportError: - raise ConfigError(MISSING_SENTRY) + check_requirements("sentry") + except DependencyException as e: + raise ConfigError(e.message) self.sentry_dsn = config["sentry"].get("dsn") if not self.sentry_dsn: diff --git a/synapse/config/repository.py b/synapse/config/repository.py index fdb1f246d0..34f1a9a92d 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -16,6 +16,7 @@ import os from collections import namedtuple +from synapse.python_dependencies import DependencyException, check_requirements from synapse.util.module_loader import load_module from ._base import Config, ConfigError @@ -34,17 +35,6 @@ THUMBNAIL_SIZE_YAML = """\ # method: %(method)s """ -MISSING_NETADDR = "Missing netaddr library. This is required for URL preview API." - -MISSING_LXML = """Missing lxml library. This is required for URL preview API. - - Install by running: - pip install lxml - - Requires libxslt1-dev system package. - """ - - ThumbnailRequirement = namedtuple( "ThumbnailRequirement", ["width", "height", "method", "media_type"] ) @@ -171,16 +161,10 @@ class ContentRepositoryConfig(Config): self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: try: - import lxml - - lxml # To stop unused lint. - except ImportError: - raise ConfigError(MISSING_LXML) + check_requirements("url_preview") - try: - from netaddr import IPSet - except ImportError: - raise ConfigError(MISSING_NETADDR) + except DependencyException as e: + raise ConfigError(e.message) if "url_preview_ip_range_blacklist" not in config: raise ConfigError( @@ -189,6 +173,9 @@ class ContentRepositoryConfig(Config): "to work" ) + # netaddr is a dependency for url_preview + from netaddr import IPSet + self.url_preview_ip_range_blacklist = IPSet( config["url_preview_ip_range_blacklist"] ) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index ec0ac547c1..07345e916a 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -147,7 +147,13 @@ def check_requirements(for_feature=None): ) except DistributionNotFound: deps_needed.append(dependency) - errors.append("Needed %s but it was not installed" % (dependency,)) + if for_feature: + errors.append( + "Needed %s for the '%s' feature but it was not installed" + % (dependency, for_feature) + ) + else: + errors.append("Needed %s but it was not installed" % (dependency,)) if not for_feature: # Check the optional dependencies are up to date. We allow them to not be -- cgit 1.4.1 From 9c555f37e30f339708dfd9a66687c4cd638aa957 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 11 Sep 2019 14:23:24 +0100 Subject: Add note about extra arg to send_membership_event, remove arg in remote_reject_invite (#6009) Some small fixes to `room_member.py` found while doing other PRs. 1. Add requester to the base `_remote_reject_invite` method. 2. `send_membership_event`'s docstring was out of date and took in a `remote_room_hosts` arg that was not used and no calling function provided. --- changelog.d/6009.misc | 1 + synapse/handlers/room_member.py | 12 ++---------- 2 files changed, 3 insertions(+), 10 deletions(-) create mode 100644 changelog.d/6009.misc (limited to 'synapse') diff --git a/changelog.d/6009.misc b/changelog.d/6009.misc new file mode 100644 index 0000000000..fea479e1dd --- /dev/null +++ b/changelog.d/6009.misc @@ -0,0 +1 @@ +Small refactor of function arguments and docstrings in RoomMemberHandler. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 093f2ea36e..a3a3d4d143 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -100,7 +100,7 @@ class RoomMemberHandler(object): raise NotImplementedError() @abc.abstractmethod - def _remote_reject_invite(self, remote_room_hosts, room_id, target): + def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): """Attempt to reject an invite for a room this server is not in. If we fail to do so we locally mark the invite as rejected. @@ -510,9 +510,7 @@ class RoomMemberHandler(object): return res @defer.inlineCallbacks - def send_membership_event( - self, requester, event, context, remote_room_hosts=None, ratelimit=True - ): + def send_membership_event(self, requester, event, context, ratelimit=True): """ Change the membership status of a user in a room. @@ -522,16 +520,10 @@ class RoomMemberHandler(object): act as the sender, will be skipped. event (SynapseEvent): The membership event. context: The context of the event. - is_guest (bool): Whether the sender is a guest. - room_hosts ([str]): Homeservers which are likely to already be in - the room, and could be danced with in order to join this - homeserver for the first time. ratelimit (bool): Whether to rate limit this request. Raises: SynapseError if there was a problem changing the membership. """ - remote_room_hosts = remote_room_hosts or [] - target_user = UserID.from_string(event.state_key) room_id = event.room_id -- cgit 1.4.1 From 7902bf1e1d6331e7964ac498988925cc26e18f79 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 11 Sep 2019 15:14:56 +0100 Subject: Clean up some code in the retry logic (#6017) * remove some unused code * make things which were constants into constants for efficiency and clarity --- changelog.d/6017.misc | 1 + synapse/storage/transactions.py | 20 -------------------- synapse/util/retryutils.py | 29 +++++++++++++---------------- 3 files changed, 14 insertions(+), 36 deletions(-) create mode 100644 changelog.d/6017.misc (limited to 'synapse') diff --git a/changelog.d/6017.misc b/changelog.d/6017.misc new file mode 100644 index 0000000000..5ccab9c6ca --- /dev/null +++ b/changelog.d/6017.misc @@ -0,0 +1 @@ +Clean up some code in the retry logic. diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index b3c3bf55bc..d81ace0ece 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -250,26 +250,6 @@ class TransactionStore(SQLBaseStore): }, ) - def get_destinations_needing_retry(self): - """Get all destinations which are due a retry for sending a transaction. - - Returns: - list: A list of dicts - """ - - return self.runInteraction( - "get_destinations_needing_retry", self._get_destinations_needing_retry - ) - - def _get_destinations_needing_retry(self, txn): - query = ( - "SELECT * FROM destinations" - " WHERE retry_last_ts > 0 and retry_next_ts < ?" - ) - - txn.execute(query, (self._clock.time_msec(),)) - return self.cursor_to_dict(txn) - def _start_cleanup_transactions(self): return run_as_background_process( "cleanup_transactions", self._cleanup_transactions diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 0862b5ca5a..5b16a81617 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -22,6 +22,15 @@ from synapse.api.errors import CodeMessageException logger = logging.getLogger(__name__) +# the intial backoff, after the first transaction fails +MIN_RETRY_INTERVAL = 10 * 60 * 1000 + +# how much we multiply the backoff by after each subsequent fail +RETRY_MULTIPLIER = 5 + +# a cap on the backoff +MAX_RETRY_INTERVAL = 24 * 60 * 60 * 1000 + class NotRetryingDestination(Exception): def __init__(self, retry_last_ts, retry_interval, destination): @@ -112,9 +121,6 @@ class RetryDestinationLimiter(object): clock, store, retry_interval, - min_retry_interval=10 * 60 * 1000, - max_retry_interval=24 * 60 * 60 * 1000, - multiplier_retry_interval=5, backoff_on_404=False, backoff_on_failure=True, ): @@ -130,12 +136,6 @@ class RetryDestinationLimiter(object): retry_interval (int): The next retry interval taken from the database in milliseconds, or zero if the last request was successful. - min_retry_interval (int): The minimum retry interval to use after - a failed request, in milliseconds. - max_retry_interval (int): The maximum retry interval to use after - a failed request, in milliseconds. - multiplier_retry_interval (int): The multiplier to use to increase - the retry interval after a failed request. backoff_on_404 (bool): Back off if we get a 404 backoff_on_failure (bool): set to False if we should not increase the @@ -146,9 +146,6 @@ class RetryDestinationLimiter(object): self.destination = destination self.retry_interval = retry_interval - self.min_retry_interval = min_retry_interval - self.max_retry_interval = max_retry_interval - self.multiplier_retry_interval = multiplier_retry_interval self.backoff_on_404 = backoff_on_404 self.backoff_on_failure = backoff_on_failure @@ -196,13 +193,13 @@ class RetryDestinationLimiter(object): else: # We couldn't connect. if self.retry_interval: - self.retry_interval *= self.multiplier_retry_interval + self.retry_interval *= RETRY_MULTIPLIER self.retry_interval *= int(random.uniform(0.8, 1.4)) - if self.retry_interval >= self.max_retry_interval: - self.retry_interval = self.max_retry_interval + if self.retry_interval >= MAX_RETRY_INTERVAL: + self.retry_interval = MAX_RETRY_INTERVAL else: - self.retry_interval = self.min_retry_interval + self.retry_interval = MIN_RETRY_INTERVAL logger.info( "Connection to %s was unsuccessful (%s(%s)); backoff now %i", -- cgit 1.4.1 From 9fc71dc5eed7531454a34f8fec34bd451458c7c6 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 11 Sep 2019 16:02:42 +0100 Subject: Use the v2 Identity Service API for lookups (MSC2134 + MSC2140) (#5976) This is a redo of https://github.com/matrix-org/synapse/pull/5897 but with `id_access_token` accepted. Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus Identity Service v2 authentication ala [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140). Identity lookup-related functions were also moved from `RoomMemberHandler` to `IdentityHandler`. --- changelog.d/5897.feature | 1 + synapse/handlers/identity.py | 56 ++++++++----- synapse/handlers/room.py | 4 +- synapse/handlers/room_member.py | 178 +++++++++++++++++++++++++++++++++++++--- synapse/rest/client/v1/room.py | 1 + synapse/util/hash.py | 33 ++++++++ 6 files changed, 238 insertions(+), 35 deletions(-) create mode 100644 changelog.d/5897.feature create mode 100644 synapse/util/hash.py (limited to 'synapse') diff --git a/changelog.d/5897.feature b/changelog.d/5897.feature new file mode 100644 index 0000000000..1557e559e8 --- /dev/null +++ b/changelog.d/5897.feature @@ -0,0 +1 @@ +Switch to using the v2 Identity Service `/lookup` API where available, with fallback to v1. (Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus id_access_token authentication for v2 Identity Service APIs from [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140)). diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index f690fd04a3..512f38e5a6 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -74,25 +74,6 @@ class IdentityHandler(BaseHandler): id_access_token = creds.get("id_access_token") return client_secret, id_server, id_access_token - def create_id_access_token_header(self, id_access_token): - """Create an Authorization header for passing to SimpleHttpClient as the header value - of an HTTP request. - - Args: - id_access_token (str): An identity server access token. - - Returns: - list[str]: The ascii-encoded bearer token encased in a list. - """ - # Prefix with Bearer - bearer_token = "Bearer %s" % id_access_token - - # Encode headers to standard ascii - bearer_token.encode("ascii") - - # Return as a list as that's how SimpleHttpClient takes header values - return [bearer_token] - @defer.inlineCallbacks def threepid_from_creds(self, id_server, creds): """ @@ -178,9 +159,7 @@ class IdentityHandler(BaseHandler): bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid} if use_v2: bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,) - headers["Authorization"] = self.create_id_access_token_header( - id_access_token - ) + headers["Authorization"] = create_id_access_token_header(id_access_token) else: bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,) @@ -478,3 +457,36 @@ class IdentityHandler(BaseHandler): except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() + + +def create_id_access_token_header(id_access_token): + """Create an Authorization header for passing to SimpleHttpClient as the header value + of an HTTP request. + + Args: + id_access_token (str): An identity server access token. + + Returns: + list[str]: The ascii-encoded bearer token encased in a list. + """ + # Prefix with Bearer + bearer_token = "Bearer %s" % id_access_token + + # Encode headers to standard ascii + bearer_token.encode("ascii") + + # Return as a list as that's how SimpleHttpClient takes header values + return [bearer_token] + + +class LookupAlgorithm: + """ + Supported hashing algorithms when performing a 3PID lookup. + + SHA256 - Hashing an (address, medium, pepper) combo with sha256, then url-safe base64 + encoding + NONE - Not performing any hashing. Simply sending an (address, medium) combo in plaintext + """ + + SHA256 = "sha256" + NONE = "none" diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a509e11d69..970be3c846 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -579,8 +579,8 @@ class RoomCreationHandler(BaseHandler): room_id = yield self._generate_room_id(creator_id=user_id, is_public=is_public) + directory_handler = self.hs.get_handlers().directory_handler if room_alias: - directory_handler = self.hs.get_handlers().directory_handler yield directory_handler.create_association( requester=requester, room_id=room_id, @@ -665,6 +665,7 @@ class RoomCreationHandler(BaseHandler): for invite_3pid in invite_3pid_list: id_server = invite_3pid["id_server"] + id_access_token = invite_3pid.get("id_access_token") # optional address = invite_3pid["address"] medium = invite_3pid["medium"] yield self.hs.get_room_member_handler().do_3pid_invite( @@ -675,6 +676,7 @@ class RoomCreationHandler(BaseHandler): id_server, requester, txn_id=None, + id_access_token=id_access_token, ) result = {"room_id": room_id} diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index a3a3d4d143..43d10a5308 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -29,9 +29,11 @@ from twisted.internet import defer from synapse import types from synapse.api.constants import EventTypes, Membership from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError +from synapse.handlers.identity import LookupAlgorithm, create_id_access_token_header from synapse.types import RoomID, UserID from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room, user_left_room +from synapse.util.hash import sha256_and_url_safe_base64 from ._base import BaseHandler @@ -626,7 +628,7 @@ class RoomMemberHandler(object): servers.remove(room_alias.domain) servers.insert(0, room_alias.domain) - return (RoomID.from_string(room_id), servers) + return RoomID.from_string(room_id), servers @defer.inlineCallbacks def _get_inviter(self, user_id, room_id): @@ -638,7 +640,15 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def do_3pid_invite( - self, room_id, inviter, medium, address, id_server, requester, txn_id + self, + room_id, + inviter, + medium, + address, + id_server, + requester, + txn_id, + id_access_token=None, ): if self.config.block_non_admin_invites: is_requester_admin = yield self.auth.is_server_admin(requester.user) @@ -661,7 +671,12 @@ class RoomMemberHandler(object): Codes.FORBIDDEN, ) - invitee = yield self._lookup_3pid(id_server, medium, address) + if not self._enable_lookup: + raise SynapseError( + 403, "Looking up third-party identifiers is denied from this server" + ) + + invitee = yield self._lookup_3pid(id_server, medium, address, id_access_token) if invitee: yield self.update_membership( @@ -673,9 +688,47 @@ class RoomMemberHandler(object): ) @defer.inlineCallbacks - def _lookup_3pid(self, id_server, medium, address): + def _lookup_3pid(self, id_server, medium, address, id_access_token=None): """Looks up a 3pid in the passed identity server. + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + id_access_token (str|None): The access token to authenticate to the identity + server with + + Returns: + str|None: the matrix ID of the 3pid, or None if it is not recognized. + """ + if id_access_token is not None: + try: + results = yield self._lookup_3pid_v2( + id_server, id_access_token, medium, address + ) + return results + + except Exception as e: + # Catch HttpResponseExcept for a non-200 response code + # Check if this identity server does not know about v2 lookups + if isinstance(e, HttpResponseException) and e.code == 404: + # This is an old identity server that does not yet support v2 lookups + logger.warning( + "Attempted v2 lookup on v1 identity server %s. Falling " + "back to v1", + id_server, + ) + else: + logger.warning("Error when looking up hashing details: %s", e) + return None + + return (yield self._lookup_3pid_v1(id_server, medium, address)) + + @defer.inlineCallbacks + def _lookup_3pid_v1(self, id_server, medium, address): + """Looks up a 3pid in the passed identity server using v1 lookup. + Args: id_server (str): The server name (including port, if required) of the identity server to use. @@ -685,10 +738,6 @@ class RoomMemberHandler(object): Returns: str: the matrix ID of the 3pid, or None if it is not recognized. """ - if not self._enable_lookup: - raise SynapseError( - 403, "Looking up third-party identifiers is denied from this server" - ) try: data = yield self.simple_http_client.get_json( "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), @@ -702,9 +751,116 @@ class RoomMemberHandler(object): return data["mxid"] except IOError as e: - logger.warn("Error from identity server lookup: %s" % (e,)) + logger.warning("Error from v1 identity server lookup: %s" % (e,)) + + return None + + @defer.inlineCallbacks + def _lookup_3pid_v2(self, id_server, id_access_token, medium, address): + """Looks up a 3pid in the passed identity server using v2 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + id_access_token (str): The access token to authenticate to the identity server with + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + + Returns: + Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. + """ + # Check what hashing details are supported by this identity server + hash_details = yield self.simple_http_client.get_json( + "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), + {"access_token": id_access_token}, + ) + + if not isinstance(hash_details, dict): + logger.warning( + "Got non-dict object when checking hash details of %s%s: %s", + id_server_scheme, + id_server, + hash_details, + ) + raise SynapseError( + 400, + "Non-dict object from %s%s during v2 hash_details request: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Extract information from hash_details + supported_lookup_algorithms = hash_details.get("algorithms") + lookup_pepper = hash_details.get("lookup_pepper") + if ( + not supported_lookup_algorithms + or not isinstance(supported_lookup_algorithms, list) + or not lookup_pepper + or not isinstance(lookup_pepper, str) + ): + raise SynapseError( + 400, + "Invalid hash details received from identity server %s%s: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Check if any of the supported lookup algorithms are present + if LookupAlgorithm.SHA256 in supported_lookup_algorithms: + # Perform a hashed lookup + lookup_algorithm = LookupAlgorithm.SHA256 + + # Hash address, medium and the pepper with sha256 + to_hash = "%s %s %s" % (address, medium, lookup_pepper) + lookup_value = sha256_and_url_safe_base64(to_hash) + + elif LookupAlgorithm.NONE in supported_lookup_algorithms: + # Perform a non-hashed lookup + lookup_algorithm = LookupAlgorithm.NONE + + # Combine together plaintext address and medium + lookup_value = "%s %s" % (address, medium) + + else: + logger.warning( + "None of the provided lookup algorithms of %s are supported: %s", + id_server, + supported_lookup_algorithms, + ) + raise SynapseError( + 400, + "Provided identity server does not support any v2 lookup " + "algorithms that this homeserver supports.", + ) + + # Authenticate with identity server given the access token from the client + headers = {"Authorization": create_id_access_token_header(id_access_token)} + + try: + lookup_results = yield self.simple_http_client.post_json_get_json( + "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), + { + "addresses": [lookup_value], + "algorithm": lookup_algorithm, + "pepper": lookup_pepper, + }, + headers=headers, + ) + except Exception as e: + logger.warning("Error when performing a v2 3pid lookup: %s", e) + raise SynapseError( + 500, "Unknown error occurred during identity server lookup" + ) + + # Check for a mapping from what we looked up to an MXID + if "mappings" not in lookup_results or not isinstance( + lookup_results["mappings"], dict + ): + logger.warning("No results from 3pid lookup") return None + # Return the MXID if it's available, or None otherwise + mxid = lookup_results["mappings"].get(lookup_value) + return mxid + @defer.inlineCallbacks def _verify_any_signature(self, data, server_hostname): if server_hostname not in data["signatures"]: @@ -844,7 +1000,6 @@ class RoomMemberHandler(object): display_name (str): A user-friendly name to represent the invited user. """ - is_url = "%s%s/_matrix/identity/api/v1/store-invite" % ( id_server_scheme, id_server, @@ -862,7 +1017,6 @@ class RoomMemberHandler(object): "sender_display_name": inviter_display_name, "sender_avatar_url": inviter_avatar_url, } - try: data = yield self.simple_http_client.post_json_get_json( is_url, invite_config @@ -1049,7 +1203,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): # The 'except' clause is very broad, but we need to # capture everything from DNS failures upwards # - logger.warn("Failed to reject invite: %s", e) + logger.warning("Failed to reject invite: %s", e) yield self.store.locally_reject_invite(target.to_string(), room_id) return {} diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 3582259026..a6a7b3b57e 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -701,6 +701,7 @@ class RoomMembershipRestServlet(TransactionRestServlet): content["id_server"], requester, txn_id, + content.get("id_access_token"), ) return 200, {} diff --git a/synapse/util/hash.py b/synapse/util/hash.py new file mode 100644 index 0000000000..359168704e --- /dev/null +++ b/synapse/util/hash.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib + +import unpaddedbase64 + + +def sha256_and_url_safe_base64(input_text): + """SHA256 hash an input string, encode the digest as url-safe base64, and + return + + :param input_text: string to hash + :type input_text: str + + :returns a sha256 hashed and url-safe base64 encoded digest + :rtype: str + """ + digest = hashlib.sha256(input_text.encode()).digest() + return unpaddedbase64.encode_base64(digest, urlsafe=True) -- cgit 1.4.1 From 6d847d8ce69f2cb849633265aaeb4a9df4ff713d Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Wed, 11 Sep 2019 20:22:18 +0300 Subject: Ensure support users can be registered even if MAU limit is reached This allows support users to be created even on MAU limits via the admin API. Support users are excluded from MAU after creation, so it makes sense to exclude them in creation - except if the whole host is in disabled state. Signed-off-by: Jason Robinson --- changelog.d/6020.bugfix | 1 + synapse/api/auth.py | 11 +++++++++-- tests/api/test_auth.py | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6020.bugfix (limited to 'synapse') diff --git a/changelog.d/6020.bugfix b/changelog.d/6020.bugfix new file mode 100644 index 0000000000..58a7deba9d --- /dev/null +++ b/changelog.d/6020.bugfix @@ -0,0 +1 @@ +Ensure support users can be registered even if MAU limit is reached. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index ddc195bc32..9e445cd808 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -25,7 +25,7 @@ from twisted.internet import defer import synapse.logging.opentracing as opentracing import synapse.types from synapse import event_auth -from synapse.api.constants import EventTypes, JoinRules, Membership +from synapse.api.constants import EventTypes, JoinRules, Membership, UserTypes from synapse.api.errors import ( AuthError, Codes, @@ -709,7 +709,7 @@ class Auth(object): ) @defer.inlineCallbacks - def check_auth_blocking(self, user_id=None, threepid=None): + def check_auth_blocking(self, user_id=None, threepid=None, user_type=None): """Checks if the user should be rejected for some external reason, such as monthly active user limiting or global disable flag @@ -722,6 +722,9 @@ class Auth(object): with a MAU blocked server, normally they would be rejected but their threepid is on the reserved list. user_id and threepid should never be set at the same time. + + user_type(str|None): If present, is used to decide whether to check against + certain blocking reasons like MAU. """ # Never fail an auth check for the server notices users or support user @@ -759,6 +762,10 @@ class Auth(object): self.hs.config.mau_limits_reserved_threepids, threepid ): return + elif user_type == UserTypes.SUPPORT: + # If the user does not exist yet and is of type "support", + # allow registration. Support users are excluded from MAU checks. + return # Else if there is no room in the MAU bucket, bail current_mau = yield self.store.get_monthly_active_count() if current_mau >= self.hs.config.max_mau_value: diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index c0cb8ef296..6121efcfa9 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -21,6 +21,7 @@ from twisted.internet import defer import synapse.handlers.auth from synapse.api.auth import Auth +from synapse.api.constants import UserTypes from synapse.api.errors import ( AuthError, Codes, @@ -335,6 +336,23 @@ class AuthTestCase(unittest.TestCase): ) yield self.auth.check_auth_blocking() + @defer.inlineCallbacks + def test_blocking_mau__depending_on_user_type(self): + self.hs.config.max_mau_value = 50 + self.hs.config.limit_usage_by_mau = True + + self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100)) + # Support users allowed + yield self.auth.check_auth_blocking(user_type=UserTypes.SUPPORT) + self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100)) + # Bots not allowed + with self.assertRaises(ResourceLimitError): + yield self.auth.check_auth_blocking(user_type=UserTypes.BOT) + self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100)) + # Real users not allowed + with self.assertRaises(ResourceLimitError): + yield self.auth.check_auth_blocking() + @defer.inlineCallbacks def test_reserved_threepid(self): self.hs.config.limit_usage_by_mau = True -- cgit 1.4.1 From a8251da10f98a251b9aa0be1f313d8d2e4ac1c3f Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 12 Sep 2019 10:57:37 +0100 Subject: Blow up config if opentracing is missing (#5985) * Blow up config if opentracing is missing --- changelog.d/5985.feature | 1 + synapse/config/tracer.py | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 changelog.d/5985.feature (limited to 'synapse') diff --git a/changelog.d/5985.feature b/changelog.d/5985.feature new file mode 100644 index 0000000000..e5e29504af --- /dev/null +++ b/changelog.d/5985.feature @@ -0,0 +1 @@ +Check at setup that opentracing is installed if it's enabled in the config. diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index 95e7ccb3a3..85d99a3166 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from synapse.python_dependencies import DependencyException, check_requirements + from ._base import Config, ConfigError @@ -32,6 +34,11 @@ class TracerConfig(Config): if not self.opentracer_enabled: return + try: + check_requirements("opentracing") + except DependencyException as e: + raise ConfigError(e.message) + # The tracer is enabled so sanitize the config self.opentracer_whitelist = opentracing_config.get("homeserver_whitelist", []) -- cgit 1.4.1 From dd2e5b0038dbe9812775e5943e5bccf550d7468a Mon Sep 17 00:00:00 2001 From: Sorunome Date: Thu, 12 Sep 2019 12:24:57 +0200 Subject: add report_stats_endpoint config option (#6012) This PR adds the optional `report_stats_endpoint` to configure where stats are reported to, if enabled. --- changelog.d/6012.feature | 1 + docs/sample_config.yaml | 5 +++++ synapse/app/homeserver.py | 6 ++++-- synapse/config/metrics.py | 9 +++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6012.feature (limited to 'synapse') diff --git a/changelog.d/6012.feature b/changelog.d/6012.feature new file mode 100644 index 0000000000..25425510c6 --- /dev/null +++ b/changelog.d/6012.feature @@ -0,0 +1 @@ +Add report_stats_endpoint option to configure where stats are reported to, if enabled. Contributed by @Sorunome. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index c970a1c679..dd4e2d5ebd 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -985,6 +985,11 @@ metrics_flags: # Whether or not to report anonymized homeserver usage statistics. # report_stats: true|false +# The endpoint to report the anonymized homeserver usage statistics to. +# Defaults to https://matrix.org/report-usage-stats/push +# +#report_stats_endpoint: https://example.com/report-usage-stats/push + ## API Configuration ## diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 04f1ed14f3..774326dff9 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -561,10 +561,12 @@ def run(hs): stats["database_engine"] = hs.get_datastore().database_engine_name stats["database_server_version"] = hs.get_datastore().get_server_version() - logger.info("Reporting stats to matrix.org: %s" % (stats,)) + logger.info( + "Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats) + ) try: yield hs.get_simple_http_client().put_json( - "https://matrix.org/report-usage-stats/push", stats + hs.config.report_stats_endpoint, stats ) except Exception as e: logger.warn("Error reporting stats: %s", e) diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 9eb1e55ddb..ec35a6b868 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -37,6 +37,9 @@ class MetricsConfig(Config): def read_config(self, config, **kwargs): self.enable_metrics = config.get("enable_metrics", False) self.report_stats = config.get("report_stats", None) + self.report_stats_endpoint = config.get( + "report_stats_endpoint", "https://matrix.org/report-usage-stats/push" + ) self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") @@ -95,4 +98,10 @@ class MetricsConfig(Config): else: res += "report_stats: %s\n" % ("true" if report_stats else "false") + res += """ + # The endpoint to report the anonymized homeserver usage statistics to. + # Defaults to https://matrix.org/report-usage-stats/push + # + #report_stats_endpoint: https://example.com/report-usage-stats/push + """ return res -- cgit 1.4.1 From 642fad8bd47ffcb74d970de632a7316dfc15d26b Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 12 Sep 2019 11:42:47 +0100 Subject: Fix SSO fallback login Well, it worked, but forgot to remove the thing saying login was unavailable. --- synapse/static/client/login/js/login.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/static/client/login/js/login.js b/synapse/static/client/login/js/login.js index e02663f50e..276c271bbe 100644 --- a/synapse/static/client/login/js/login.js +++ b/synapse/static/client/login/js/login.js @@ -62,7 +62,7 @@ var show_login = function() { $("#sso_flow").show(); } - if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas) { + if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas && !matrixLogin.serverAcceptsSso) { $("#no_login_types").show(); } }; -- cgit 1.4.1 From 0388beafe48d1ae9c30565c37b8902b9aa0b8fe2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 12 Sep 2019 12:59:43 +0100 Subject: Fix bug in calculating the federation retry backoff period (#6025) This was intended to introduce an element of jitter; instead it gave you a 30/60 chance of resetting to zero. --- changelog.d/6025.bugfix | 1 + synapse/util/retryutils.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6025.bugfix (limited to 'synapse') diff --git a/changelog.d/6025.bugfix b/changelog.d/6025.bugfix new file mode 100644 index 0000000000..50d7f9aab5 --- /dev/null +++ b/changelog.d/6025.bugfix @@ -0,0 +1 @@ +Fix bug in calculating the federation retry backoff period. \ No newline at end of file diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 5b16a81617..33263fe20f 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -193,8 +193,9 @@ class RetryDestinationLimiter(object): else: # We couldn't connect. if self.retry_interval: - self.retry_interval *= RETRY_MULTIPLIER - self.retry_interval *= int(random.uniform(0.8, 1.4)) + self.retry_interval = int( + self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4) + ) if self.retry_interval >= MAX_RETRY_INTERVAL: self.retry_interval = MAX_RETRY_INTERVAL -- cgit 1.4.1 From 3d882a7ba52114f18ec6be61c51561db203a0534 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 12 Sep 2019 13:00:13 +0100 Subject: Remove the cap on federation retry interval. (#6026) Essentially the intention here is to end up blacklisting servers which never respond to federation requests. Fixes https://github.com/matrix-org/synapse/issues/5113. --- changelog.d/6026.feature | 1 + synapse/util/retryutils.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6026.feature (limited to 'synapse') diff --git a/changelog.d/6026.feature b/changelog.d/6026.feature new file mode 100644 index 0000000000..2489ff09b5 --- /dev/null +++ b/changelog.d/6026.feature @@ -0,0 +1 @@ +Stop sending federation transactions to servers which have been down for a long time. diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 33263fe20f..b740913b58 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -28,8 +28,8 @@ MIN_RETRY_INTERVAL = 10 * 60 * 1000 # how much we multiply the backoff by after each subsequent fail RETRY_MULTIPLIER = 5 -# a cap on the backoff -MAX_RETRY_INTERVAL = 24 * 60 * 60 * 1000 +# a cap on the backoff. (Essentially none) +MAX_RETRY_INTERVAL = 2 ** 63 class NotRetryingDestination(Exception): -- cgit 1.4.1 From b617864cd9f81109e818bc5ae95bee317d917b72 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 13 Sep 2019 02:29:55 +1000 Subject: Fix for structured logging tests stomping on logs (#6023) --- MANIFEST.in | 12 +++++---- changelog.d/6023.misc | 1 + mypy.ini | 54 ++++++++++++++++++++++++++++++++++++++++ synapse/config/logger.py | 33 ++++++++++++++++++------ synapse/logging/_structured.py | 8 +++--- synapse/logging/_terse_json.py | 8 +++--- synapse/logging/opentracing.py | 4 +-- synapse/metrics/__init__.py | 5 ++-- synapse/metrics/_exposition.py | 4 ++- synapse/python_dependencies.py | 7 +++--- tests/logging/test_structured.py | 25 ++++++++++++++++--- tests/logging/test_terse_json.py | 4 +-- tox.ini | 30 +++++++++++++++++----- 13 files changed, 154 insertions(+), 41 deletions(-) create mode 100644 changelog.d/6023.misc create mode 100644 mypy.ini (limited to 'synapse') diff --git a/MANIFEST.in b/MANIFEST.in index 919cd8a1cd..9c2902b8d2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -38,14 +38,16 @@ exclude sytest-blacklist include pyproject.toml recursive-include changelog.d * -prune .github -prune demo/etc -prune docker +prune .buildkite prune .circleci +prune .codecov.yml prune .coveragerc +prune .github prune debian -prune .codecov.yml -prune .buildkite +prune demo/etc +prune docker +prune mypy.ini +prune stubs exclude jenkins* recursive-exclude jenkins *.sh diff --git a/changelog.d/6023.misc b/changelog.d/6023.misc new file mode 100644 index 0000000000..d80410c22c --- /dev/null +++ b/changelog.d/6023.misc @@ -0,0 +1 @@ +Fix the structured logging tests stomping on the global log configuration for subsequent tests. diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000000..8788574ee3 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,54 @@ +[mypy] +namespace_packages=True +plugins=mypy_zope:plugin +follow_imports=skip +mypy_path=stubs + +[mypy-synapse.config.homeserver] +# this is a mess because of the metaclass shenanigans +ignore_errors = True + +[mypy-zope] +ignore_missing_imports = True + +[mypy-constantly] +ignore_missing_imports = True + +[mypy-twisted.*] +ignore_missing_imports = True + +[mypy-treq.*] +ignore_missing_imports = True + +[mypy-hyperlink] +ignore_missing_imports = True + +[mypy-h11] +ignore_missing_imports = True + +[mypy-opentracing] +ignore_missing_imports = True + +[mypy-OpenSSL] +ignore_missing_imports = True + +[mypy-netaddr] +ignore_missing_imports = True + +[mypy-saml2.*] +ignore_missing_imports = True + +[mypy-unpaddedbase64] +ignore_missing_imports = True + +[mypy-canonicaljson] +ignore_missing_imports = True + +[mypy-jaeger_client] +ignore_missing_imports = True + +[mypy-jsonschema] +ignore_missing_imports = True + +[mypy-signedjson.*] +ignore_missing_imports = True diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 2704c18720..767ecfdf09 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -21,7 +21,12 @@ from string import Template import yaml -from twisted.logger import STDLibLogObserver, globalLogBeginner +from twisted.logger import ( + ILogObserver, + LogBeginner, + STDLibLogObserver, + globalLogBeginner, +) import synapse from synapse.app import _base as appbase @@ -124,7 +129,7 @@ class LoggingConfig(Config): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def _setup_stdlib_logging(config, log_config): +def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner): """ Set up Python stdlib logging. """ @@ -165,12 +170,12 @@ def _setup_stdlib_logging(config, log_config): return observer(event) - globalLogBeginner.beginLoggingTo( - [_log], redirectStandardIO=not config.no_redirect_stdio - ) + logBeginner.beginLoggingTo([_log], redirectStandardIO=not config.no_redirect_stdio) if not config.no_redirect_stdio: print("Redirected stdout/stderr to logs") + return observer + def _reload_stdlib_logging(*args, log_config=None): logger = logging.getLogger("") @@ -181,7 +186,9 @@ def _reload_stdlib_logging(*args, log_config=None): logging.config.dictConfig(log_config) -def setup_logging(hs, config, use_worker_options=False): +def setup_logging( + hs, config, use_worker_options=False, logBeginner: LogBeginner = globalLogBeginner +) -> ILogObserver: """ Set up the logging subsystem. @@ -191,6 +198,12 @@ def setup_logging(hs, config, use_worker_options=False): use_worker_options (bool): True to use the 'worker_log_config' option instead of 'log_config'. + + logBeginner: The Twisted logBeginner to use. + + Returns: + The "root" Twisted Logger observer, suitable for sending logs to from a + Logger instance. """ log_config = config.worker_log_config if use_worker_options else config.log_config @@ -210,10 +223,12 @@ def setup_logging(hs, config, use_worker_options=False): log_config_body = read_config() if log_config_body and log_config_body.get("structured") is True: - setup_structured_logging(hs, config, log_config_body) + logger = setup_structured_logging( + hs, config, log_config_body, logBeginner=logBeginner + ) appbase.register_sighup(read_config, callback=reload_structured_logging) else: - _setup_stdlib_logging(config, log_config_body) + logger = _setup_stdlib_logging(config, log_config_body, logBeginner=logBeginner) appbase.register_sighup(read_config, callback=_reload_stdlib_logging) # make sure that the first thing we log is a thing we can grep backwards @@ -221,3 +236,5 @@ def setup_logging(hs, config, use_worker_options=False): logging.warn("***** STARTING SERVER *****") logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse)) logging.info("Server hostname: %s", config.server_name) + + return logger diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 0367d6dfc4..3220e985a9 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -18,6 +18,7 @@ import os.path import sys import typing import warnings +from typing import List import attr from constantly import NamedConstant, Names, ValueConstant, Values @@ -33,7 +34,6 @@ from twisted.logger import ( LogLevelFilterPredicate, LogPublisher, eventAsText, - globalLogBeginner, jsonFileLogObserver, ) @@ -134,7 +134,7 @@ class PythonStdlibToTwistedLogger(logging.Handler): ) -def SynapseFileLogObserver(outFile: typing.io.TextIO) -> FileLogObserver: +def SynapseFileLogObserver(outFile: typing.IO[str]) -> FileLogObserver: """ A log observer that formats events like the traditional log formatter and sends them to `outFile`. @@ -265,7 +265,7 @@ def setup_structured_logging( hs, config, log_config: dict, - logBeginner: LogBeginner = globalLogBeginner, + logBeginner: LogBeginner, redirect_stdlib_logging: bool = True, ) -> LogPublisher: """ @@ -286,7 +286,7 @@ def setup_structured_logging( if "drains" not in log_config: raise ConfigError("The logging configuration requires a list of drains.") - observers = [] + observers = [] # type: List[ILogObserver] for observer in parse_drain_configs(log_config["drains"]): # Pipe drains diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 7f1e8f23fe..0ebbde06f2 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -21,10 +21,11 @@ import sys from collections import deque from ipaddress import IPv4Address, IPv6Address, ip_address from math import floor -from typing.io import TextIO +from typing import IO import attr from simplejson import dumps +from zope.interface import implementer from twisted.application.internet import ClientService from twisted.internet.endpoints import ( @@ -33,7 +34,7 @@ from twisted.internet.endpoints import ( TCP6ClientEndpoint, ) from twisted.internet.protocol import Factory, Protocol -from twisted.logger import FileLogObserver, Logger +from twisted.logger import FileLogObserver, ILogObserver, Logger from twisted.python.failure import Failure @@ -129,7 +130,7 @@ def flatten_event(event: dict, metadata: dict, include_time: bool = False): return new_event -def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObserver: +def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogObserver: """ A log observer that formats events to a flattened JSON representation. @@ -146,6 +147,7 @@ def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObs @attr.s +@implementer(ILogObserver) class TerseJSONToTCPLogObserver(object): """ An IObserver that writes JSON logs to a TCP target. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 7246253018..308a27213b 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -223,8 +223,8 @@ try: from jaeger_client import Config as JaegerConfig from synapse.logging.scopecontextmanager import LogContextScopeManager except ImportError: - JaegerConfig = None - LogContextScopeManager = None + JaegerConfig = None # type: ignore + LogContextScopeManager = None # type: ignore logger = logging.getLogger(__name__) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index b5c9595cb9..bec3b13397 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -20,6 +20,7 @@ import os import platform import threading import time +from typing import Dict, Union import six @@ -42,9 +43,7 @@ logger = logging.getLogger(__name__) METRICS_PREFIX = "/_synapse/metrics" running_on_pypy = platform.python_implementation() == "PyPy" -all_metrics = [] -all_collectors = [] -all_gauges = {} +all_gauges = {} # type: Dict[str, Union[LaterGauge, InFlightGauge, BucketCollector]] HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py index 1933ecd3e3..74d9c3ecd3 100644 --- a/synapse/metrics/_exposition.py +++ b/synapse/metrics/_exposition.py @@ -36,7 +36,9 @@ from twisted.web.resource import Resource try: from prometheus_client.samples import Sample except ImportError: - Sample = namedtuple("Sample", ["name", "labels", "value", "timestamp", "exemplar"]) + Sample = namedtuple( + "Sample", ["name", "labels", "value", "timestamp", "exemplar"] + ) # type: ignore CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8") diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 07345e916a..0bd563edc7 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -15,6 +15,7 @@ # limitations under the License. import logging +from typing import Set from pkg_resources import ( DistributionNotFound, @@ -97,7 +98,7 @@ CONDITIONAL_REQUIREMENTS = { "jwt": ["pyjwt>=1.6.4"], } -ALL_OPTIONAL_REQUIREMENTS = set() +ALL_OPTIONAL_REQUIREMENTS = set() # type: Set[str] for name, optional_deps in CONDITIONAL_REQUIREMENTS.items(): # Exclude systemd as it's a system-based requirement. @@ -174,8 +175,8 @@ def check_requirements(for_feature=None): pass if deps_needed: - for e in errors: - logging.error(e) + for err in errors: + logging.error(err) raise DependencyException(deps_needed) diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py index a786de0233..451d05c0f0 100644 --- a/tests/logging/test_structured.py +++ b/tests/logging/test_structured.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os import os.path import shutil @@ -33,7 +34,20 @@ class FakeBeginner(object): self.observers = observers -class StructuredLoggingTestCase(HomeserverTestCase): +class StructuredLoggingTestBase(object): + """ + Test base that registers a cleanup handler to reset the stdlib log handler + to 'unset'. + """ + + def prepare(self, reactor, clock, hs): + def _cleanup(): + logging.getLogger("synapse").setLevel(logging.NOTSET) + + self.addCleanup(_cleanup) + + +class StructuredLoggingTestCase(StructuredLoggingTestBase, HomeserverTestCase): """ Tests for Synapse's structured logging support. """ @@ -139,7 +153,9 @@ class StructuredLoggingTestCase(HomeserverTestCase): self.assertEqual(logs[0]["request"], "somereq") -class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase): +class StructuredLoggingConfigurationFileTestCase( + StructuredLoggingTestBase, HomeserverTestCase +): def make_homeserver(self, reactor, clock): tempdir = self.mktemp() @@ -179,10 +195,11 @@ class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase): """ When a structured logging config is given, Synapse will use it. """ - setup_logging(self.hs, self.hs.config) + beginner = FakeBeginner() + publisher = setup_logging(self.hs, self.hs.config, logBeginner=beginner) # Make a logger and send an event - logger = Logger(namespace="tests.logging.test_structured") + logger = Logger(namespace="tests.logging.test_structured", observer=publisher) with LoggingContext("testcontext", request="somereq"): logger.info("Hello there, {name}!", name="steve") diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 514282591d..4cf81f7128 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -23,10 +23,10 @@ from synapse.logging._structured import setup_structured_logging from tests.server import connect_client from tests.unittest import HomeserverTestCase -from .test_structured import FakeBeginner +from .test_structured import FakeBeginner, StructuredLoggingTestBase -class TerseJSONTCPTestCase(HomeserverTestCase): +class TerseJSONTCPTestCase(StructuredLoggingTestBase, HomeserverTestCase): def test_log_output(self): """ The Terse JSON outputter delivers simplified structured logs over TCP. diff --git a/tox.ini b/tox.ini index 7cb40847b5..1bce10a4ce 100644 --- a/tox.ini +++ b/tox.ini @@ -2,6 +2,7 @@ envlist = packaging, py35, py36, py37, check_codestyle, check_isort [base] +basepython = python3.7 deps = mock python-subunit @@ -137,18 +138,35 @@ commands = {toxinidir}/scripts-dev/generate_sample_config --check skip_install = True deps = coverage -whitelist_externals = - bash commands= coverage combine coverage report +[testenv:cov-erase] +skip_install = True +deps = + coverage +commands= + coverage erase + +[testenv:cov-html] +skip_install = True +deps = + coverage +commands= + coverage html + [testenv:mypy] -basepython = python3.5 +basepython = python3.7 +skip_install = True deps = {[base]deps} mypy + mypy-zope + typeshed +env = + MYPYPATH = stubs/ extras = all -commands = mypy --ignore-missing-imports \ - synapse/logging/_structured.py \ - synapse/logging/_terse_json.py +commands = mypy --show-traceback \ + synapse/logging/ \ + synapse/config/ -- cgit 1.4.1 From 785cbd3999ab011440b453e07992d3b0c92a4059 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Sep 2019 12:07:03 +0100 Subject: Make the sample saml config closer to our standards It' still not great, thanks to the nested dictionaries, but it's better. --- docs/sample_config.yaml | 110 ++++++++++++++++++++------------------- synapse/config/saml2_config.py | 113 ++++++++++++++++++++++------------------- 2 files changed, 121 insertions(+), 102 deletions(-) (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 0c6be30e51..8cfc5c312a 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1031,12 +1031,13 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # Enable SAML2 for registration and login. Uses pysaml2. # -# `sp_config` is the configuration for the pysaml2 Service Provider. -# See pysaml2 docs for format of config. +# At least one of `sp_config` or `config_path` must be set in this section to +# enable SAML login. # -# Default values will be used for the 'entityid' and 'service' settings, -# so it is not normally necessary to specify them unless you need to -# override them. +# (You will probably also want to set the following options to `false` to +# disable the regular login/registration flows: +# * enable_registration +# * password_config.enabled # # Once SAML support is enabled, a metadata file will be exposed at # https://:/_matrix/saml2/metadata.xml, which you may be able to @@ -1044,52 +1045,59 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # the IdP to use an ACS location of # https://:/_matrix/saml2/authn_response. # -#saml2_config: -# sp_config: -# # point this to the IdP's metadata. You can use either a local file or -# # (preferably) a URL. -# metadata: -# #local: ["saml2/idp.xml"] -# remote: -# - url: https://our_idp/metadata.xml -# -# # By default, the user has to go to our login page first. If you'd like to -# # allow IdP-initiated login, set 'allow_unsolicited: True' in a -# # 'service.sp' section: -# # -# #service: -# # sp: -# # allow_unsolicited: True -# -# # The examples below are just used to generate our metadata xml, and you -# # may well not need it, depending on your setup. Alternatively you -# # may need a whole lot more detail - see the pysaml2 docs! -# -# description: ["My awesome SP", "en"] -# name: ["Test SP", "en"] -# -# organization: -# name: Example com -# display_name: -# - ["Example co", "en"] -# url: "http://example.com" -# -# contact_person: -# - given_name: Bob -# sur_name: "the Sysadmin" -# email_address": ["admin@example.com"] -# contact_type": technical -# -# # Instead of putting the config inline as above, you can specify a -# # separate pysaml2 configuration file: -# # -# config_path: "CONFDIR/sp_conf.py" -# -# # the lifetime of a SAML session. This defines how long a user has to -# # complete the authentication process, if allow_unsolicited is unset. -# # The default is 5 minutes. -# # -# # saml_session_lifetime: 5m +saml2_config: + # `sp_config` is the configuration for the pysaml2 Service Provider. + # See pysaml2 docs for format of config. + # + # Default values will be used for the 'entityid' and 'service' settings, + # so it is not normally necessary to specify them unless you need to + # override them. + # + #sp_config: + # # point this to the IdP's metadata. You can use either a local file or + # # (preferably) a URL. + # metadata: + # #local: ["saml2/idp.xml"] + # remote: + # - url: https://our_idp/metadata.xml + # + # # By default, the user has to go to our login page first. If you'd like + # # to allow IdP-initiated login, set 'allow_unsolicited: True' in a + # # 'service.sp' section: + # # + # #service: + # # sp: + # # allow_unsolicited: true + # + # # The examples below are just used to generate our metadata xml, and you + # # may well not need them, depending on your setup. Alternatively you + # # may need a whole lot more detail - see the pysaml2 docs! + # + # description: ["My awesome SP", "en"] + # name: ["Test SP", "en"] + # + # organization: + # name: Example com + # display_name: + # - ["Example co", "en"] + # url: "http://example.com" + # + # contact_person: + # - given_name: Bob + # sur_name: "the Sysadmin" + # email_address": ["admin@example.com"] + # contact_type": technical + + # Instead of putting the config inline as above, you can specify a + # separate pysaml2 configuration file: + # + #config_path: "CONFDIR/sp_conf.py" + + # the lifetime of a SAML session. This defines how long a user has to + # complete the authentication process, if allow_unsolicited is unset. + # The default is 5 minutes. + # + #saml_session_lifetime: 5m diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index 6a8161547a..c46ac087db 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -26,6 +26,9 @@ class SAML2Config(Config): if not saml2_config or not saml2_config.get("enabled", True): return + if not saml2_config.get("sp_config") and not saml2_config.get("config_path"): + return + try: check_requirements("saml2") except DependencyException as e: @@ -76,12 +79,13 @@ class SAML2Config(Config): return """\ # Enable SAML2 for registration and login. Uses pysaml2. # - # `sp_config` is the configuration for the pysaml2 Service Provider. - # See pysaml2 docs for format of config. + # At least one of `sp_config` or `config_path` must be set in this section to + # enable SAML login. # - # Default values will be used for the 'entityid' and 'service' settings, - # so it is not normally necessary to specify them unless you need to - # override them. + # (You will probably also want to set the following options to `false` to + # disable the regular login/registration flows: + # * enable_registration + # * password_config.enabled # # Once SAML support is enabled, a metadata file will be exposed at # https://:/_matrix/saml2/metadata.xml, which you may be able to @@ -89,52 +93,59 @@ class SAML2Config(Config): # the IdP to use an ACS location of # https://:/_matrix/saml2/authn_response. # - #saml2_config: - # sp_config: - # # point this to the IdP's metadata. You can use either a local file or - # # (preferably) a URL. - # metadata: - # #local: ["saml2/idp.xml"] - # remote: - # - url: https://our_idp/metadata.xml - # - # # By default, the user has to go to our login page first. If you'd like to - # # allow IdP-initiated login, set 'allow_unsolicited: True' in a - # # 'service.sp' section: - # # - # #service: - # # sp: - # # allow_unsolicited: True - # - # # The examples below are just used to generate our metadata xml, and you - # # may well not need it, depending on your setup. Alternatively you - # # may need a whole lot more detail - see the pysaml2 docs! - # - # description: ["My awesome SP", "en"] - # name: ["Test SP", "en"] - # - # organization: - # name: Example com - # display_name: - # - ["Example co", "en"] - # url: "http://example.com" - # - # contact_person: - # - given_name: Bob - # sur_name: "the Sysadmin" - # email_address": ["admin@example.com"] - # contact_type": technical - # - # # Instead of putting the config inline as above, you can specify a - # # separate pysaml2 configuration file: - # # - # config_path: "%(config_dir_path)s/sp_conf.py" - # - # # the lifetime of a SAML session. This defines how long a user has to - # # complete the authentication process, if allow_unsolicited is unset. - # # The default is 5 minutes. - # # - # # saml_session_lifetime: 5m + saml2_config: + # `sp_config` is the configuration for the pysaml2 Service Provider. + # See pysaml2 docs for format of config. + # + # Default values will be used for the 'entityid' and 'service' settings, + # so it is not normally necessary to specify them unless you need to + # override them. + # + #sp_config: + # # point this to the IdP's metadata. You can use either a local file or + # # (preferably) a URL. + # metadata: + # #local: ["saml2/idp.xml"] + # remote: + # - url: https://our_idp/metadata.xml + # + # # By default, the user has to go to our login page first. If you'd like + # # to allow IdP-initiated login, set 'allow_unsolicited: True' in a + # # 'service.sp' section: + # # + # #service: + # # sp: + # # allow_unsolicited: true + # + # # The examples below are just used to generate our metadata xml, and you + # # may well not need them, depending on your setup. Alternatively you + # # may need a whole lot more detail - see the pysaml2 docs! + # + # description: ["My awesome SP", "en"] + # name: ["Test SP", "en"] + # + # organization: + # name: Example com + # display_name: + # - ["Example co", "en"] + # url: "http://example.com" + # + # contact_person: + # - given_name: Bob + # sur_name: "the Sysadmin" + # email_address": ["admin@example.com"] + # contact_type": technical + + # Instead of putting the config inline as above, you can specify a + # separate pysaml2 configuration file: + # + #config_path: "%(config_dir_path)s/sp_conf.py" + + # the lifetime of a SAML session. This defines how long a user has to + # complete the authentication process, if allow_unsolicited is unset. + # The default is 5 minutes. + # + #saml_session_lifetime: 5m """ % { "config_dir_path": config_dir_path } -- cgit 1.4.1 From a8ac40445c98b9e1fc2538d7d4ec49c80b0298ac Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Sep 2019 15:20:49 +0100 Subject: Record mappings from saml users in an external table We want to assign unique mxids to saml users based on an incrementing suffix. For that to work, we need to record the allocated mxid in a separate table. --- docs/sample_config.yaml | 26 ++++++ synapse/config/saml2_config.py | 78 +++++++++++++++- synapse/handlers/saml_handler.py | 103 +++++++++++++++++++-- synapse/rest/client/v1/login.py | 14 +++ synapse/storage/registration.py | 41 ++++++++ .../storage/schema/delta/56/user_external_ids.sql | 24 +++++ 6 files changed, 276 insertions(+), 10 deletions(-) create mode 100644 synapse/storage/schema/delta/56/user_external_ids.sql (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 8cfc5c312a..9021fe2cb8 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1099,6 +1099,32 @@ saml2_config: # #saml_session_lifetime: 5m + # The SAML attribute (after mapping via the attribute maps) to use to derive + # the Matrix ID from. 'uid' by default. + # + #mxid_source_attribute: displayName + + # The mapping system to use for mapping the saml attribute onto a matrix ID. + # Options include: + # * 'hexencode' (which maps unpermitted characters to '=xx') + # * 'dotreplace' (which replaces unpermitted characters with '.'). + # The default is 'hexencode'. + # + #mxid_mapping: dotreplace + + # In previous versions of synapse, the mapping from SAML attribute to MXID was + # always calculated dynamically rather than stored in a table. For backwards- + # compatibility, we will look for user_ids matching such a pattern before + # creating a new account. + # + # This setting controls the SAML attribute which will be used for this + # backwards-compatibility lookup. Typically it should be 'uid', but if the + # attribute maps are changed, it may be necessary to change it. + # + # The default is 'uid'. + # + #grandfathered_mxid_source_attribute: upn + # Enable CAS for registration and login. diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index c46ac087db..a022470702 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -12,7 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re + from synapse.python_dependencies import DependencyException, check_requirements +from synapse.types import ( + map_username_to_mxid_localpart, + mxid_localpart_allowed_characters, +) from ._base import Config, ConfigError @@ -36,6 +42,14 @@ class SAML2Config(Config): self.saml2_enabled = True + self.saml2_mxid_source_attribute = saml2_config.get( + "mxid_source_attribute", "uid" + ) + + self.saml2_grandfathered_mxid_source_attribute = saml2_config.get( + "grandfathered_mxid_source_attribute", "uid" + ) + import saml2.config self.saml2_sp_config = saml2.config.SPConfig() @@ -51,6 +65,12 @@ class SAML2Config(Config): saml2_config.get("saml_session_lifetime", "5m") ) + mapping = saml2_config.get("mxid_mapping", "hexencode") + try: + self.saml2_mxid_mapper = MXID_MAPPER_MAP[mapping] + except KeyError: + raise ConfigError("%s is not a known mxid_mapping" % (mapping,)) + def _default_saml_config_dict(self): import saml2 @@ -58,6 +78,13 @@ class SAML2Config(Config): if public_baseurl is None: raise ConfigError("saml2_config requires a public_baseurl to be set") + required_attributes = {"uid", self.saml2_mxid_source_attribute} + + optional_attributes = {"displayName"} + if self.saml2_grandfathered_mxid_source_attribute: + optional_attributes.add(self.saml2_grandfathered_mxid_source_attribute) + optional_attributes -= required_attributes + metadata_url = public_baseurl + "_matrix/saml2/metadata.xml" response_url = public_baseurl + "_matrix/saml2/authn_response" return { @@ -69,8 +96,9 @@ class SAML2Config(Config): (response_url, saml2.BINDING_HTTP_POST) ] }, - "required_attributes": ["uid"], - "optional_attributes": ["mail", "surname", "givenname"], + "required_attributes": list(required_attributes), + "optional_attributes": list(optional_attributes), + # "name_id_format": saml2.saml.NAMEID_FORMAT_PERSISTENT, } }, } @@ -146,6 +174,52 @@ class SAML2Config(Config): # The default is 5 minutes. # #saml_session_lifetime: 5m + + # The SAML attribute (after mapping via the attribute maps) to use to derive + # the Matrix ID from. 'uid' by default. + # + #mxid_source_attribute: displayName + + # The mapping system to use for mapping the saml attribute onto a matrix ID. + # Options include: + # * 'hexencode' (which maps unpermitted characters to '=xx') + # * 'dotreplace' (which replaces unpermitted characters with '.'). + # The default is 'hexencode'. + # + #mxid_mapping: dotreplace + + # In previous versions of synapse, the mapping from SAML attribute to MXID was + # always calculated dynamically rather than stored in a table. For backwards- + # compatibility, we will look for user_ids matching such a pattern before + # creating a new account. + # + # This setting controls the SAML attribute which will be used for this + # backwards-compatibility lookup. Typically it should be 'uid', but if the + # attribute maps are changed, it may be necessary to change it. + # + # The default is 'uid'. + # + #grandfathered_mxid_source_attribute: upn """ % { "config_dir_path": config_dir_path } + + +DOT_REPLACE_PATTERN = re.compile( + ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters)),)) +) + + +def dot_replace_for_mxid(username: str) -> str: + username = username.lower() + username = DOT_REPLACE_PATTERN.sub(".", username) + + # regular mxids aren't allowed to start with an underscore either + username = re.sub("^_", "", username) + return username + + +MXID_MAPPER_MAP = { + "hexencode": map_username_to_mxid_localpart, + "dotreplace": dot_replace_for_mxid, +} diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index a1ce6929cf..5fa8272dc9 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -21,6 +21,8 @@ from saml2.client import Saml2Client from synapse.api.errors import SynapseError from synapse.http.servlet import parse_string from synapse.rest.client.v1.login import SSOAuthHandler +from synapse.types import UserID, map_username_to_mxid_localpart +from synapse.util.async_helpers import Linearizer logger = logging.getLogger(__name__) @@ -29,12 +31,26 @@ class SamlHandler: def __init__(self, hs): self._saml_client = Saml2Client(hs.config.saml2_sp_config) self._sso_auth_handler = SSOAuthHandler(hs) + self._registration_handler = hs.get_registration_handler() + + self._clock = hs.get_clock() + self._datastore = hs.get_datastore() + self._hostname = hs.hostname + self._saml2_session_lifetime = hs.config.saml2_session_lifetime + self._mxid_source_attribute = hs.config.saml2_mxid_source_attribute + self._grandfathered_mxid_source_attribute = ( + hs.config.saml2_grandfathered_mxid_source_attribute + ) + self._mxid_mapper = hs.config.saml2_mxid_mapper + + # identifier for the external_ids table + self._auth_provider_id = "saml" # a map from saml session id to Saml2SessionData object self._outstanding_requests_dict = {} - self._clock = hs.get_clock() - self._saml2_session_lifetime = hs.config.saml2_session_lifetime + # a lock on the mappings + self._mapping_lock = Linearizer(name="saml_mapping", clock=self._clock) def handle_redirect_request(self, client_redirect_url): """Handle an incoming request to /login/sso/redirect @@ -60,7 +76,7 @@ class SamlHandler: # this shouldn't happen! raise Exception("prepare_for_authenticate didn't return a Location header") - def handle_saml_response(self, request): + async def handle_saml_response(self, request): """Handle an incoming request to /_matrix/saml2/authn_response Args: @@ -77,6 +93,10 @@ class SamlHandler: # the dict. self.expire_sessions() + user_id = await self._map_saml_response_to_user(resp_bytes) + self._sso_auth_handler.complete_sso_login(user_id, request, relay_state) + + async def _map_saml_response_to_user(self, resp_bytes): try: saml2_auth = self._saml_client.parse_authn_request_response( resp_bytes, @@ -91,18 +111,85 @@ class SamlHandler: logger.warning("SAML2 response was not signed") raise SynapseError(400, "SAML2 response was not signed") - if "uid" not in saml2_auth.ava: + try: + remote_user_id = saml2_auth.ava["uid"][0] + except KeyError: logger.warning("SAML2 response lacks a 'uid' attestation") raise SynapseError(400, "uid not in SAML2 response") + try: + mxid_source = saml2_auth.ava[self._mxid_source_attribute][0] + except KeyError: + logger.warning( + "SAML2 response lacks a '%s' attestation", self._mxid_source_attribute + ) + raise SynapseError( + 400, "%s not in SAML2 response" % (self._mxid_source_attribute,) + ) + self._outstanding_requests_dict.pop(saml2_auth.in_response_to, None) - username = saml2_auth.ava["uid"][0] displayName = saml2_auth.ava.get("displayName", [None])[0] - return self._sso_auth_handler.on_successful_auth( - username, request, relay_state, user_display_name=displayName - ) + with (await self._mapping_lock.queue(self._auth_provider_id)): + # first of all, check if we already have a mapping for this user + logger.info( + "Looking for existing mapping for user %s:%s", + self._auth_provider_id, + remote_user_id, + ) + registered_user_id = await self._datastore.get_user_by_external_id( + self._auth_provider_id, remote_user_id + ) + if registered_user_id is not None: + logger.info("Found existing mapping %s", registered_user_id) + return registered_user_id + + # backwards-compatibility hack: see if there is an existing user with a + # suitable mapping from the uid + if ( + self._grandfathered_mxid_source_attribute + and self._grandfathered_mxid_source_attribute in saml2_auth.ava + ): + attrval = saml2_auth.ava[self._grandfathered_mxid_source_attribute][0] + user_id = UserID( + map_username_to_mxid_localpart(attrval), self._hostname + ).to_string() + logger.info( + "Looking for existing account based on mapped %s %s", + self._grandfathered_mxid_source_attribute, + user_id, + ) + + users = await self._datastore.get_users_by_id_case_insensitive(user_id) + if users: + registered_user_id = list(users.keys())[0] + logger.info("Grandfathering mapping to %s", registered_user_id) + await self._datastore.record_user_external_id( + self._auth_provider_id, remote_user_id, registered_user_id + ) + return registered_user_id + + # figure out a new mxid for this user + base_mxid_localpart = self._mxid_mapper(mxid_source) + + suffix = 0 + while True: + localpart = base_mxid_localpart + (str(suffix) if suffix else "") + if not await self._datastore.get_users_by_id_case_insensitive( + UserID(localpart, self._hostname).to_string() + ): + break + suffix += 1 + logger.info("Allocating mxid for new user with localpart %s", localpart) + + registered_user_id = await self._registration_handler.register_user( + localpart=localpart, default_display_name=displayName + ) + await self._datastore.record_user_external_id( + self._auth_provider_id, remote_user_id, registered_user_id + ) + return registered_user_id def expire_sessions(self): expire_before = self._clock.time_msec() - self._saml2_session_lifetime diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 5762b9fd06..eeaa72b205 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -29,6 +29,7 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.rest.client.v2_alpha._base import client_patterns from synapse.rest.well_known import WellKnownBuilder from synapse.types import UserID, map_username_to_mxid_localpart @@ -507,6 +508,19 @@ class SSOAuthHandler(object): localpart=localpart, default_display_name=user_display_name ) + self.complete_sso_login(registered_user_id, request, client_redirect_url) + + def complete_sso_login( + self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str + ): + """Having figured out a mxid for this user, complete the HTTP request + + Args: + registered_user_id: + request: + client_redirect_url: + """ + login_token = self._macaroon_gen.generate_short_term_login_token( registered_user_id ) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 55e4e84d71..1e3c2148f6 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -22,6 +22,7 @@ from six import iterkeys from six.moves import range from twisted.internet import defer +from twisted.internet.defer import Deferred from synapse.api.constants import UserTypes from synapse.api.errors import Codes, StoreError, ThreepidValidationError @@ -337,6 +338,26 @@ class RegistrationWorkerStore(SQLBaseStore): return self.runInteraction("get_users_by_id_case_insensitive", f) + async def get_user_by_external_id( + self, auth_provider: str, external_id: str + ) -> str: + """Look up a user by their external auth id + + Args: + auth_provider: identifier for the remote auth provider + external_id: id on that system + + Returns: + str|None: the mxid of the user, or None if they are not known + """ + return await self._simple_select_one_onecol( + table="user_external_ids", + keyvalues={"auth_provider": auth_provider, "external_id": external_id}, + retcol="user_id", + allow_none=True, + desc="get_user_by_external_id", + ) + @defer.inlineCallbacks def count_all_users(self): """Counts all users registered on the homeserver.""" @@ -848,6 +869,26 @@ class RegistrationStore( self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) txn.call_after(self.is_guest.invalidate, (user_id,)) + def record_user_external_id( + self, auth_provider: str, external_id: str, user_id: str + ) -> Deferred: + """Record a mapping from an external user id to a mxid + + Args: + auth_provider: identifier for the remote auth provider + external_id: id on that system + user_id: complete mxid that it is mapped to + """ + return self._simple_insert( + table="user_external_ids", + values={ + "auth_provider": auth_provider, + "external_id": external_id, + "user_id": user_id, + }, + desc="record_user_external_id", + ) + def user_set_password_hash(self, user_id, password_hash): """ NB. This does *not* evict any cache because the one use for this diff --git a/synapse/storage/schema/delta/56/user_external_ids.sql b/synapse/storage/schema/delta/56/user_external_ids.sql new file mode 100644 index 0000000000..91390c4527 --- /dev/null +++ b/synapse/storage/schema/delta/56/user_external_ids.sql @@ -0,0 +1,24 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * a table which records mappings from external auth providers to mxids + */ +CREATE TABLE IF NOT EXISTS user_external_ids ( + auth_provider TEXT NOT NULL, + external_id TEXT NOT NULL, + user_id TEXT NOT NULL, + UNIQUE (auth_provider, external_id) +); -- cgit 1.4.1 From 850dcfd2d3a1d689042fb38c8a16b652244068c2 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 14 Sep 2019 04:58:38 +1000 Subject: Fix well-known lookups with the federation certificate whitelist (#5997) --- changelog.d/5996.bugfix | 1 + synapse/config/tls.py | 9 ++++- synapse/crypto/context_factory.py | 26 +++++++------- synapse/http/federation/matrix_federation_agent.py | 2 +- tests/config/test_tls.py | 40 ++++++++++++++++++++++ 5 files changed, 63 insertions(+), 15 deletions(-) create mode 100644 changelog.d/5996.bugfix (limited to 'synapse') diff --git a/changelog.d/5996.bugfix b/changelog.d/5996.bugfix new file mode 100644 index 0000000000..05e31faaa2 --- /dev/null +++ b/changelog.d/5996.bugfix @@ -0,0 +1 @@ +federation_certificate_verification_whitelist now will not cause TypeErrors to be raised (a regression in 1.3). Additionally, it now supports internationalised domain names in their non-canonical representation. diff --git a/synapse/config/tls.py b/synapse/config/tls.py index c0148aa95c..fc47ba3e9a 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -110,8 +110,15 @@ class TlsConfig(Config): # Support globs (*) in whitelist values self.federation_certificate_verification_whitelist = [] for entry in fed_whitelist_entries: + try: + entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii")) + except UnicodeEncodeError: + raise ConfigError( + "IDNA domain names are not allowed in the " + "federation_certificate_verification_whitelist: %s" % (entry,) + ) + # Convert globs to regex - entry_regex = glob_to_regex(entry) self.federation_certificate_verification_whitelist.append(entry_regex) # List of custom certificate authorities for federation traffic validation diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index 06e63a96b5..e93f0b3705 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -15,7 +15,6 @@ import logging -import idna from service_identity import VerificationError from service_identity.pyopenssl import verify_hostname, verify_ip_address from zope.interface import implementer @@ -114,14 +113,20 @@ class ClientTLSOptionsFactory(object): self._no_verify_ssl_context = self._no_verify_ssl.getContext() self._no_verify_ssl_context.set_info_callback(self._context_info_cb) - def get_options(self, host): + def get_options(self, host: bytes): + + # IPolicyForHTTPS.get_options takes bytes, but we want to compare + # against the str whitelist. The hostnames in the whitelist are already + # IDNA-encoded like the hosts will be here. + ascii_host = host.decode("ascii") + # Check if certificate verification has been enabled should_verify = self._config.federation_verify_certificates # Check if we've disabled certificate verification for this host if should_verify: for regex in self._config.federation_certificate_verification_whitelist: - if regex.match(host): + if regex.match(ascii_host): should_verify = False break @@ -162,7 +167,7 @@ class SSLClientConnectionCreator(object): Replaces twisted.internet.ssl.ClientTLSOptions """ - def __init__(self, hostname, ctx, verify_certs): + def __init__(self, hostname: bytes, ctx, verify_certs: bool): self._ctx = ctx self._verifier = ConnectionVerifier(hostname, verify_certs) @@ -190,21 +195,16 @@ class ConnectionVerifier(object): # This code is based on twisted.internet.ssl.ClientTLSOptions. - def __init__(self, hostname, verify_certs): + def __init__(self, hostname: bytes, verify_certs): self._verify_certs = verify_certs - if isIPAddress(hostname) or isIPv6Address(hostname): - self._hostnameBytes = hostname.encode("ascii") + _decoded = hostname.decode("ascii") + if isIPAddress(_decoded) or isIPv6Address(_decoded): self._is_ip_address = True else: - # twisted's ClientTLSOptions falls back to the stdlib impl here if - # idna is not installed, but points out that lacks support for - # IDNA2008 (http://bugs.python.org/issue17305). - # - # We can rely on having idna. - self._hostnameBytes = idna.encode(hostname) self._is_ip_address = False + self._hostnameBytes = hostname self._hostnameASCII = self._hostnameBytes.decode("ascii") def verify_context_info_cb(self, ssl_connection, where): diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index feae7de5be..647d26dc56 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -217,7 +217,7 @@ class MatrixHostnameEndpoint(object): self._tls_options = None else: self._tls_options = tls_client_options_factory.get_options( - self._parsed_uri.host.decode("ascii") + self._parsed_uri.host ) self._srv_resolver = srv_resolver diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py index 8e0c4b9533..b02780772a 100644 --- a/tests/config/test_tls.py +++ b/tests/config/test_tls.py @@ -16,6 +16,7 @@ import os +import idna import yaml from OpenSSL import SSL @@ -235,3 +236,42 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg= ) self.assertTrue(conf.acme_enabled) + + def test_whitelist_idna_failure(self): + """ + The federation certificate whitelist will not allow IDNA domain names. + """ + config = { + "federation_certificate_verification_whitelist": [ + "example.com", + "*.ドメイン.テスト", + ] + } + t = TestConfig() + e = self.assertRaises( + ConfigError, t.read_config, config, config_dir_path="", data_dir_path="" + ) + self.assertIn("IDNA domain names", str(e)) + + def test_whitelist_idna_result(self): + """ + The federation certificate whitelist will match on IDNA encoded names. + """ + config = { + "federation_certificate_verification_whitelist": [ + "example.com", + "*.xn--eckwd4c7c.xn--zckzah", + ] + } + t = TestConfig() + t.read_config(config, config_dir_path="", data_dir_path="") + + cf = ClientTLSOptionsFactory(t) + + # Not in the whitelist + opts = cf.get_options(b"notexample.com") + self.assertTrue(opts._verifier._verify_certs) + + # Caught by the wildcard + opts = cf.get_options(idna.encode("テスト.ドメイン.テスト")) + self.assertFalse(opts._verifier._verify_certs) -- cgit 1.4.1 From 1e19ce00bff8d67168d39201cdf9424f7b2f22f6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 17 Sep 2019 11:41:54 +0100 Subject: Add 'failure_ts' column to 'destinations' table (#6016) Track the time that a server started failing at, for general analysis purposes. --- changelog.d/6016.misc | 1 + .../schema/delta/56/destinations_failure_ts.sql | 25 ++++ synapse/storage/transactions.py | 23 ++-- synapse/util/retryutils.py | 16 ++- tests/handlers/test_typing.py | 7 +- tests/storage/test_transactions.py | 8 +- tests/util/test_retryutils.py | 127 +++++++++++++++++++++ 7 files changed, 195 insertions(+), 12 deletions(-) create mode 100644 changelog.d/6016.misc create mode 100644 synapse/storage/schema/delta/56/destinations_failure_ts.sql create mode 100644 tests/util/test_retryutils.py (limited to 'synapse') diff --git a/changelog.d/6016.misc b/changelog.d/6016.misc new file mode 100644 index 0000000000..91cf164714 --- /dev/null +++ b/changelog.d/6016.misc @@ -0,0 +1 @@ +Add a 'failure_ts' column to the 'destinations' database table. diff --git a/synapse/storage/schema/delta/56/destinations_failure_ts.sql b/synapse/storage/schema/delta/56/destinations_failure_ts.sql new file mode 100644 index 0000000000..f00889290b --- /dev/null +++ b/synapse/storage/schema/delta/56/destinations_failure_ts.sql @@ -0,0 +1,25 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Record the timestamp when a given server started failing + */ +ALTER TABLE destinations ADD failure_ts BIGINT; + +/* as a rough approximation, we assume that the server started failing at + * retry_interval before the last retry + */ +UPDATE destinations SET failure_ts = retry_last_ts - retry_interval + WHERE retry_last_ts > 0; diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index d81ace0ece..289c117396 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -165,7 +165,7 @@ class TransactionStore(SQLBaseStore): txn, table="destinations", keyvalues={"destination": destination}, - retcols=("destination", "retry_last_ts", "retry_interval"), + retcols=("destination", "failure_ts", "retry_last_ts", "retry_interval"), allow_none=True, ) @@ -174,12 +174,15 @@ class TransactionStore(SQLBaseStore): else: return None - def set_destination_retry_timings(self, destination, retry_last_ts, retry_interval): + def set_destination_retry_timings( + self, destination, failure_ts, retry_last_ts, retry_interval + ): """Sets the current retry timings for a given destination. Both timings should be zero if retrying is no longer occuring. Args: destination (str) + failure_ts (int|None) - when the server started failing (ms since epoch) retry_last_ts (int) - time of last retry attempt in unix epoch ms retry_interval (int) - how long until next retry in ms """ @@ -189,12 +192,13 @@ class TransactionStore(SQLBaseStore): "set_destination_retry_timings", self._set_destination_retry_timings, destination, + failure_ts, retry_last_ts, retry_interval, ) def _set_destination_retry_timings( - self, txn, destination, retry_last_ts, retry_interval + self, txn, destination, failure_ts, retry_last_ts, retry_interval ): if self.database_engine.can_native_upsert: @@ -202,9 +206,12 @@ class TransactionStore(SQLBaseStore): # resetting it) or greater than the existing retry interval. sql = """ - INSERT INTO destinations (destination, retry_last_ts, retry_interval) - VALUES (?, ?, ?) + INSERT INTO destinations ( + destination, failure_ts, retry_last_ts, retry_interval + ) + VALUES (?, ?, ?, ?) ON CONFLICT (destination) DO UPDATE SET + failure_ts = EXCLUDED.failure_ts, retry_last_ts = EXCLUDED.retry_last_ts, retry_interval = EXCLUDED.retry_interval WHERE @@ -212,7 +219,7 @@ class TransactionStore(SQLBaseStore): OR destinations.retry_interval < EXCLUDED.retry_interval """ - txn.execute(sql, (destination, retry_last_ts, retry_interval)) + txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval)) return @@ -225,7 +232,7 @@ class TransactionStore(SQLBaseStore): txn, table="destinations", keyvalues={"destination": destination}, - retcols=("retry_last_ts", "retry_interval"), + retcols=("failure_ts", "retry_last_ts", "retry_interval"), allow_none=True, ) @@ -235,6 +242,7 @@ class TransactionStore(SQLBaseStore): table="destinations", values={ "destination": destination, + "failure_ts": failure_ts, "retry_last_ts": retry_last_ts, "retry_interval": retry_interval, }, @@ -245,6 +253,7 @@ class TransactionStore(SQLBaseStore): "destinations", keyvalues={"destination": destination}, updatevalues={ + "failure_ts": failure_ts, "retry_last_ts": retry_last_ts, "retry_interval": retry_interval, }, diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index b740913b58..a5f2fbef5c 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -80,11 +80,13 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs) # We aren't ready to retry that destination. raise """ + failure_ts = None retry_last_ts, retry_interval = (0, 0) retry_timings = yield store.get_destination_retry_timings(destination) if retry_timings: + failure_ts = retry_timings["failure_ts"] retry_last_ts, retry_interval = ( retry_timings["retry_last_ts"], retry_timings["retry_interval"], @@ -108,6 +110,7 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs) destination, clock, store, + failure_ts, retry_interval, backoff_on_failure=backoff_on_failure, **kwargs @@ -120,6 +123,7 @@ class RetryDestinationLimiter(object): destination, clock, store, + failure_ts, retry_interval, backoff_on_404=False, backoff_on_failure=True, @@ -133,6 +137,8 @@ class RetryDestinationLimiter(object): destination (str) clock (Clock) store (DataStore) + failure_ts (int|None): when this destination started failing (in ms since + the epoch), or zero if the last request was successful retry_interval (int): The next retry interval taken from the database in milliseconds, or zero if the last request was successful. @@ -145,6 +151,7 @@ class RetryDestinationLimiter(object): self.store = store self.destination = destination + self.failure_ts = failure_ts self.retry_interval = retry_interval self.backoff_on_404 = backoff_on_404 self.backoff_on_failure = backoff_on_failure @@ -186,6 +193,7 @@ class RetryDestinationLimiter(object): logger.debug( "Connection to %s was successful; clearing backoff", self.destination ) + self.failure_ts = None retry_last_ts = 0 self.retry_interval = 0 elif not self.backoff_on_failure: @@ -211,11 +219,17 @@ class RetryDestinationLimiter(object): ) retry_last_ts = int(self.clock.time_msec()) + if self.failure_ts is None: + self.failure_ts = retry_last_ts + @defer.inlineCallbacks def store_retry_timings(): try: yield self.store.set_destination_retry_timings( - self.destination, retry_last_ts, self.retry_interval + self.destination, + self.failure_ts, + retry_last_ts, + self.retry_interval, ) except Exception: logger.exception("Failed to store destination_retry_timings") diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 5d5e324df2..1f2ef5d01f 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -99,7 +99,12 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.event_source = hs.get_event_sources().sources["typing"] self.datastore = hs.get_datastore() - retry_timings_res = {"destination": "", "retry_last_ts": 0, "retry_interval": 0} + retry_timings_res = { + "destination": "", + "retry_last_ts": 0, + "retry_interval": 0, + "failure_ts": None, + } self.datastore.get_destination_retry_timings.return_value = defer.succeed( retry_timings_res ) diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py index 14169afa96..a771d5af29 100644 --- a/tests/storage/test_transactions.py +++ b/tests/storage/test_transactions.py @@ -29,17 +29,19 @@ class TransactionStoreTestCase(HomeserverTestCase): r = self.get_success(d) self.assertIsNone(r) - d = self.store.set_destination_retry_timings("example.com", 50, 100) + d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100) self.get_success(d) d = self.store.get_destination_retry_timings("example.com") r = self.get_success(d) - self.assert_dict({"retry_last_ts": 50, "retry_interval": 100}, r) + self.assert_dict( + {"retry_last_ts": 50, "retry_interval": 100, "failure_ts": 1000}, r + ) def test_initial_set_transactions(self): """Tests that we can successfully set the destination retries (there was a bug around invalidating the cache that broke this) """ - d = self.store.set_destination_retry_timings("example.com", 50, 100) + d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100) self.get_success(d) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py new file mode 100644 index 0000000000..9e348694ad --- /dev/null +++ b/tests/util/test_retryutils.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.util.retryutils import ( + MIN_RETRY_INTERVAL, + RETRY_MULTIPLIER, + NotRetryingDestination, + get_retry_limiter, +) + +from tests.unittest import HomeserverTestCase + + +class RetryLimiterTestCase(HomeserverTestCase): + def test_new_destination(self): + """A happy-path case with a new destination and a successful operation""" + store = self.hs.get_datastore() + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + # advance the clock a bit before making the request + self.pump(1) + + with limiter: + pass + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertIsNone(new_timings) + + def test_limiter(self): + """General test case which walks through the process of a failing request""" + store = self.hs.get_datastore() + + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + self.pump(1) + try: + with limiter: + self.pump(1) + failure_ts = self.clock.time_msec() + raise AssertionError("argh") + except AssertionError: + pass + + # wait for the update to land + self.pump() + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertEqual(new_timings["failure_ts"], failure_ts) + self.assertEqual(new_timings["retry_last_ts"], failure_ts) + self.assertEqual(new_timings["retry_interval"], MIN_RETRY_INTERVAL) + + # now if we try again we should get a failure + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + self.failureResultOf(d, NotRetryingDestination) + + # + # advance the clock and try again + # + + self.pump(MIN_RETRY_INTERVAL) + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + self.pump(1) + try: + with limiter: + self.pump(1) + retry_ts = self.clock.time_msec() + raise AssertionError("argh") + except AssertionError: + pass + + # wait for the update to land + self.pump() + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertEqual(new_timings["failure_ts"], failure_ts) + self.assertEqual(new_timings["retry_last_ts"], retry_ts) + self.assertGreaterEqual( + new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5 + ) + self.assertLessEqual( + new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0 + ) + + # + # one more go, with success + # + self.pump(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0) + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + self.pump(1) + with limiter: + self.pump(1) + + # wait for the update to land + self.pump() + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertIsNone(new_timings) -- cgit 1.4.1 From 70c52821ce9e755e4a5c3081510fb1260f609ee3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Sep 2019 12:41:23 +0100 Subject: Fix race condition in room stats. (#6029) Broke in #5971 Basically the bug is that if get_current_state_deltas returns no new updates and we then take the max pos, its possible that we miss an update that happens in between the two calls. (e.g. get_current_state_deltas looks up to stream pos 5, then an event persists and so getting the max stream pos returns 6, meaning that next time we check for things with a stream pos bigger than 6) --- changelog.d/6029.bugfix | 1 + synapse/handlers/stats.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6029.bugfix (limited to 'synapse') diff --git a/changelog.d/6029.bugfix b/changelog.d/6029.bugfix new file mode 100644 index 0000000000..9ea095103b --- /dev/null +++ b/changelog.d/6029.bugfix @@ -0,0 +1 @@ +Fix room and user stats tracking. diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 3c265f3718..cbac7c347a 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -84,6 +84,13 @@ class StatsHandler(StateDeltasHandler): # Loop round handling deltas until we're up to date while True: + # Be sure to read the max stream_ordering *before* checking if there are any outstanding + # deltas, since there is otherwise a chance that we could miss updates which arrive + # after we check the deltas. + room_max_stream_ordering = yield self.store.get_room_max_stream_ordering() + if self.pos == room_max_stream_ordering: + break + deltas = yield self.store.get_current_state_deltas(self.pos) if deltas: @@ -94,7 +101,7 @@ class StatsHandler(StateDeltasHandler): else: room_deltas = {} user_deltas = {} - max_pos = yield self.store.get_room_max_stream_ordering() + max_pos = room_max_stream_ordering # Then count deltas for total_events and total_event_bytes. room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes( @@ -117,10 +124,9 @@ class StatsHandler(StateDeltasHandler): stream_id=max_pos, ) - event_processing_positions.labels("stats").set(max_pos) + logger.debug("Handled room stats to %s -> %s", self.pos, max_pos) - if self.pos == max_pos: - break + event_processing_positions.labels("stats").set(max_pos) self.pos = max_pos -- cgit 1.4.1 From 379d2a8c3918557bacdadea6b508bddd1ce20eaf Mon Sep 17 00:00:00 2001 From: dstipp Date: Tue, 17 Sep 2019 07:55:29 -0400 Subject: (#5849) Convert rst to markdown (#6040) Converting some of the rst documentation to markdown. Attempted to preserve whitespace and line breaks to minimize cosmetic change. --- CONTRIBUTING.rst | 2 +- INSTALL.md | 4 +- README.rst | 6 +- UPGRADE.rst | 2 +- changelog.d/5849.doc | 1 + docs/CAPTCHA_SETUP.md | 31 +++ docs/CAPTCHA_SETUP.rst | 30 --- docs/MSC1711_certificates_FAQ.md | 4 +- docs/README.md | 7 + docs/README.rst | 6 - docs/ancient_architecture_notes.md | 81 ++++++ docs/ancient_architecture_notes.rst | 59 ----- docs/application_services.md | 31 +++ docs/application_services.rst | 35 --- docs/architecture.md | 65 +++++ docs/architecture.rst | 68 ----- docs/code_style.md | 169 ++++++++++++ docs/code_style.rst | 180 ------------- docs/federate.md | 4 +- docs/log_contexts.md | 494 +++++++++++++++++++++++++++++++++++ docs/log_contexts.rst | 498 ------------------------------------ docs/media_repository.md | 30 +++ docs/media_repository.rst | 27 -- docs/metrics-howto.md | 217 ++++++++++++++++ docs/metrics-howto.rst | 285 --------------------- docs/opentracing.md | 93 +++++++ docs/opentracing.rst | 123 --------- docs/password_auth_providers.md | 116 +++++++++ docs/password_auth_providers.rst | 113 -------- docs/postgres.md | 164 ++++++++++++ docs/postgres.rst | 166 ------------ docs/replication.md | 37 +++ docs/replication.rst | 40 --- docs/reverse_proxy.md | 123 +++++++++ docs/reverse_proxy.rst | 112 -------- docs/sample_config.yaml | 12 +- docs/tcp_replication.md | 249 ++++++++++++++++++ docs/tcp_replication.rst | 249 ------------------ docs/turn-howto.md | 123 +++++++++ docs/turn-howto.rst | 127 --------- docs/workers.md | 284 ++++++++++++++++++++ docs/workers.rst | 301 ---------------------- synapse/config/server.py | 12 +- 43 files changed, 2338 insertions(+), 2442 deletions(-) create mode 100644 changelog.d/5849.doc create mode 100644 docs/CAPTCHA_SETUP.md delete mode 100644 docs/CAPTCHA_SETUP.rst create mode 100644 docs/README.md delete mode 100644 docs/README.rst create mode 100644 docs/ancient_architecture_notes.md delete mode 100644 docs/ancient_architecture_notes.rst create mode 100644 docs/application_services.md delete mode 100644 docs/application_services.rst create mode 100644 docs/architecture.md delete mode 100644 docs/architecture.rst create mode 100644 docs/code_style.md delete mode 100644 docs/code_style.rst create mode 100644 docs/log_contexts.md delete mode 100644 docs/log_contexts.rst create mode 100644 docs/media_repository.md delete mode 100644 docs/media_repository.rst create mode 100644 docs/metrics-howto.md delete mode 100644 docs/metrics-howto.rst create mode 100644 docs/opentracing.md delete mode 100644 docs/opentracing.rst create mode 100644 docs/password_auth_providers.md delete mode 100644 docs/password_auth_providers.rst create mode 100644 docs/postgres.md delete mode 100644 docs/postgres.rst create mode 100644 docs/replication.md delete mode 100644 docs/replication.rst create mode 100644 docs/reverse_proxy.md delete mode 100644 docs/reverse_proxy.rst create mode 100644 docs/tcp_replication.md delete mode 100644 docs/tcp_replication.rst create mode 100644 docs/turn-howto.md delete mode 100644 docs/turn-howto.rst create mode 100644 docs/workers.md delete mode 100644 docs/workers.rst (limited to 'synapse') diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 94dc650485..620dc88ce2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -56,7 +56,7 @@ Code style All Matrix projects have a well-defined code-style - and sometimes we've even got as far as documenting it... For instance, synapse's code style doc lives -at https://github.com/matrix-org/synapse/tree/master/docs/code_style.rst. +at https://github.com/matrix-org/synapse/tree/master/docs/code_style.md. Please ensure your changes match the cosmetic style of the existing project, and **never** mix cosmetic and functional changes in the same commit, as it diff --git a/INSTALL.md b/INSTALL.md index 6bce370ea8..3eb979c362 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -373,7 +373,7 @@ is suitable for local testing, but for any practical use, you will either need to enable a reverse proxy, or configure Synapse to expose an HTTPS port. For information on using a reverse proxy, see -[docs/reverse_proxy.rst](docs/reverse_proxy.rst). +[docs/reverse_proxy.md](docs/reverse_proxy.md). To configure Synapse to expose an HTTPS port, you will need to edit `homeserver.yaml`, as follows: @@ -446,7 +446,7 @@ on your server even if `enable_registration` is `false`. ## Setting up a TURN server For reliable VoIP calls to be routed via this homeserver, you MUST configure -a TURN server. See [docs/turn-howto.rst](docs/turn-howto.rst) for details. +a TURN server. See [docs/turn-howto.md](docs/turn-howto.md) for details. ## URL previews diff --git a/README.rst b/README.rst index bbff8de5ab..2948fd0765 100644 --- a/README.rst +++ b/README.rst @@ -115,7 +115,7 @@ Registering a new user from a client By default, registration of new users via Matrix clients is disabled. To enable it, specify ``enable_registration: true`` in ``homeserver.yaml``. (It is then -recommended to also set up CAPTCHA - see ``_.) +recommended to also set up CAPTCHA - see ``_.) Once ``enable_registration`` is set to ``true``, it is possible to register a user via `riot.im `_ or other Matrix clients. @@ -186,7 +186,7 @@ Almost all installations should opt to use PostreSQL. Advantages include: synapse itself. For information on how to install and use PostgreSQL, please see -`docs/postgres.rst `_. +`docs/postgres.md `_. .. _reverse-proxy: @@ -201,7 +201,7 @@ It is recommended to put a reverse proxy such as doing so is that it means that you can expose the default https port (443) to Matrix clients without needing to run Synapse with root privileges. -For information on configuring one, see ``_. +For information on configuring one, see ``_. Identity Servers ================ diff --git a/UPGRADE.rst b/UPGRADE.rst index dddcd75fda..5aaf804902 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -103,7 +103,7 @@ Upgrading to v1.2.0 =================== Some counter metrics have been renamed, with the old names deprecated. See -`the metrics documentation `_ +`the metrics documentation `_ for details. Upgrading to v1.1.0 diff --git a/changelog.d/5849.doc b/changelog.d/5849.doc new file mode 100644 index 0000000000..fbe62e8633 --- /dev/null +++ b/changelog.d/5849.doc @@ -0,0 +1 @@ +Convert documentation to markdown (from rst) diff --git a/docs/CAPTCHA_SETUP.md b/docs/CAPTCHA_SETUP.md new file mode 100644 index 0000000000..5f9057530b --- /dev/null +++ b/docs/CAPTCHA_SETUP.md @@ -0,0 +1,31 @@ +# Overview +Captcha can be enabled for this home server. This file explains how to do that. +The captcha mechanism used is Google's ReCaptcha. This requires API keys from Google. + +## Getting keys + +Requires a public/private key pair from: + + + +Must be a reCAPTCHA v2 key using the "I'm not a robot" Checkbox option + +## Setting ReCaptcha Keys + +The keys are a config option on the home server config. If they are not +visible, you can generate them via `--generate-config`. Set the following value: + + recaptcha_public_key: YOUR_PUBLIC_KEY + recaptcha_private_key: YOUR_PRIVATE_KEY + +In addition, you MUST enable captchas via: + + enable_registration_captcha: true + +## Configuring IP used for auth + +The ReCaptcha API requires that the IP address of the user who solved the +captcha is sent. If the client is connecting through a proxy or load balancer, +it may be required to use the `X-Forwarded-For` (XFF) header instead of the origin +IP address. This can be configured using the `x_forwarded` directive in the +listeners section of the homeserver.yaml configuration file. diff --git a/docs/CAPTCHA_SETUP.rst b/docs/CAPTCHA_SETUP.rst deleted file mode 100644 index 0c22ee4ff6..0000000000 --- a/docs/CAPTCHA_SETUP.rst +++ /dev/null @@ -1,30 +0,0 @@ -Captcha can be enabled for this home server. This file explains how to do that. -The captcha mechanism used is Google's ReCaptcha. This requires API keys from Google. - -Getting keys ------------- -Requires a public/private key pair from: - -https://developers.google.com/recaptcha/ - -Must be a reCAPTCHA v2 key using the "I'm not a robot" Checkbox option - -Setting ReCaptcha Keys ----------------------- -The keys are a config option on the home server config. If they are not -visible, you can generate them via --generate-config. Set the following value:: - - recaptcha_public_key: YOUR_PUBLIC_KEY - recaptcha_private_key: YOUR_PRIVATE_KEY - -In addition, you MUST enable captchas via:: - - enable_registration_captcha: true - -Configuring IP used for auth ----------------------------- -The ReCaptcha API requires that the IP address of the user who solved the -captcha is sent. If the client is connecting through a proxy or load balancer, -it may be required to use the X-Forwarded-For (XFF) header instead of the origin -IP address. This can be configured using the x_forwarded directive in the -listeners section of the homeserver.yaml configuration file. diff --git a/docs/MSC1711_certificates_FAQ.md b/docs/MSC1711_certificates_FAQ.md index 83497380df..80bd1294c7 100644 --- a/docs/MSC1711_certificates_FAQ.md +++ b/docs/MSC1711_certificates_FAQ.md @@ -147,7 +147,7 @@ your domain, you can simply route all traffic through the reverse proxy by updating the SRV record appropriately (or removing it, if the proxy listens on 8448). -See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a +See [reverse_proxy.md](reverse_proxy.md) for information on setting up a reverse proxy. #### Option 3: add a .well-known file to delegate your matrix traffic @@ -319,7 +319,7 @@ We no longer actively recommend against using a reverse proxy. Many admins will find it easier to direct federation traffic to a reverse proxy and manage their own TLS certificates, and this is a supported configuration. -See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a +See [reverse_proxy.md](reverse_proxy.md) for information on setting up a reverse proxy. ### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy? diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000000..3c6ea48c66 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,7 @@ +# Synapse Documentation + +This directory contains documentation specific to the `synapse` homeserver. + +All matrix-generic documentation now lives in its own project, located at [matrix-org/matrix-doc](https://github.com/matrix-org/matrix-doc) + +(Note: some items here may be moved to [matrix-org/matrix-doc](https://github.com/matrix-org/matrix-doc) at some point in the future.) diff --git a/docs/README.rst b/docs/README.rst deleted file mode 100644 index 3012da8b19..0000000000 --- a/docs/README.rst +++ /dev/null @@ -1,6 +0,0 @@ -All matrix-generic documentation now lives in its own project at - -github.com/matrix-org/matrix-doc.git - -Only Synapse implementation-specific documentation lives here now -(together with some older stuff will be shortly migrated over to matrix-doc) diff --git a/docs/ancient_architecture_notes.md b/docs/ancient_architecture_notes.md new file mode 100644 index 0000000000..3ea8976cc7 --- /dev/null +++ b/docs/ancient_architecture_notes.md @@ -0,0 +1,81 @@ +> **Warning** +> These architecture notes are spectacularly old, and date back +> to when Synapse was just federation code in isolation. This should be +> merged into the main spec. + +# Server to Server + +## Server to Server Stack + +To use the server to server stack, home servers should only need to +interact with the Messaging layer. + +The server to server side of things is designed into 4 distinct layers: + +1. Messaging Layer +2. Pdu Layer +3. Transaction Layer +4. Transport Layer + +Where the bottom (the transport layer) is what talks to the internet via +HTTP, and the top (the messaging layer) talks to the rest of the Home +Server with a domain specific API. + +1. **Messaging Layer** + + This is what the rest of the Home Server hits to send messages, join rooms, + etc. It also allows you to register callbacks for when it get's notified by + lower levels that e.g. a new message has been received. + + It is responsible for serializing requests to send to the data + layer, and to parse requests received from the data layer. + +2. **PDU Layer** + + This layer handles: + + - duplicate `pdu_id`'s - i.e., it makes sure we ignore them. + - responding to requests for a given `pdu_id` + - responding to requests for all metadata for a given context (i.e. room) + - handling incoming backfill requests + + So it has to parse incoming messages to discover which are metadata and + which aren't, and has to correctly clobber existing metadata where + appropriate. + + For incoming PDUs, it has to check the PDUs it references to see + if we have missed any. If we have go and ask someone (another + home server) for it. + +3. **Transaction Layer** + + This layer makes incoming requests idempotent. i.e., it stores + which transaction id's we have seen and what our response were. + If we have already seen a message with the given transaction id, + we do not notify higher levels but simply respond with the + previous response. + + `transaction_id` is from "`GET /send//`" + + It's also responsible for batching PDUs into single transaction for + sending to remote destinations, so that we only ever have one + transaction in flight to a given destination at any one time. + + This is also responsible for answering requests for things after a + given set of transactions, i.e., ask for everything after 'ver' X. + +4. **Transport Layer** + + This is responsible for starting a HTTP server and hitting the + correct callbacks on the Transaction layer, as well as sending + both data and requests for data. + +## Persistence + +We persist things in a single sqlite3 database. All database queries get +run on a separate, dedicated thread. This that we only ever have one +query running at a time, making it a lot easier to do things in a safe +manner. + +The queries are located in the `synapse.persistence.transactions` module, +and the table information in the `synapse.persistence.tables` module. diff --git a/docs/ancient_architecture_notes.rst b/docs/ancient_architecture_notes.rst deleted file mode 100644 index 2a5a2613c4..0000000000 --- a/docs/ancient_architecture_notes.rst +++ /dev/null @@ -1,59 +0,0 @@ -.. WARNING:: - These architecture notes are spectacularly old, and date back to when Synapse - was just federation code in isolation. This should be merged into the main - spec. - - -= Server to Server = - -== Server to Server Stack == - -To use the server to server stack, home servers should only need to interact with the Messaging layer. - -The server to server side of things is designed into 4 distinct layers: - - 1. Messaging Layer - 2. Pdu Layer - 3. Transaction Layer - 4. Transport Layer - -Where the bottom (the transport layer) is what talks to the internet via HTTP, and the top (the messaging layer) talks to the rest of the Home Server with a domain specific API. - -1. Messaging Layer - This is what the rest of the Home Server hits to send messages, join rooms, etc. It also allows you to register callbacks for when it get's notified by lower levels that e.g. a new message has been received. - - It is responsible for serializing requests to send to the data layer, and to parse requests received from the data layer. - - -2. PDU Layer - This layer handles: - * duplicate pdu_id's - i.e., it makes sure we ignore them. - * responding to requests for a given pdu_id - * responding to requests for all metadata for a given context (i.e. room) - * handling incoming backfill requests - - So it has to parse incoming messages to discover which are metadata and which aren't, and has to correctly clobber existing metadata where appropriate. - - For incoming PDUs, it has to check the PDUs it references to see if we have missed any. If we have go and ask someone (another home server) for it. - - -3. Transaction Layer - This layer makes incoming requests idempotent. I.e., it stores which transaction id's we have seen and what our response were. If we have already seen a message with the given transaction id, we do not notify higher levels but simply respond with the previous response. - -transaction_id is from "GET /send//" - - It's also responsible for batching PDUs into single transaction for sending to remote destinations, so that we only ever have one transaction in flight to a given destination at any one time. - - This is also responsible for answering requests for things after a given set of transactions, i.e., ask for everything after 'ver' X. - - -4. Transport Layer - This is responsible for starting a HTTP server and hitting the correct callbacks on the Transaction layer, as well as sending both data and requests for data. - - -== Persistence == - -We persist things in a single sqlite3 database. All database queries get run on a separate, dedicated thread. This that we only ever have one query running at a time, making it a lot easier to do things in a safe manner. - -The queries are located in the synapse.persistence.transactions module, and the table information in the synapse.persistence.tables module. - diff --git a/docs/application_services.md b/docs/application_services.md new file mode 100644 index 0000000000..06cb79f1f9 --- /dev/null +++ b/docs/application_services.md @@ -0,0 +1,31 @@ +# Registering an Application Service + +The registration of new application services depends on the homeserver used. +In synapse, you need to create a new configuration file for your AS and add it +to the list specified under the `app_service_config_files` config +option in your synapse config. + +For example: + +```yaml +app_service_config_files: +- /home/matrix/.synapse/.yaml +``` + +The format of the AS configuration file is as follows: + +```yaml +url: +as_token: +hs_token: +sender_localpart: +namespaces: + users: # List of users we're interested in + - exclusive: + regex: + - ... + aliases: [] # List of aliases we're interested in + rooms: [] # List of room ids we're interested in +``` + +See the [spec](https://matrix.org/docs/spec/application_service/unstable.html) for further details on how application services work. diff --git a/docs/application_services.rst b/docs/application_services.rst deleted file mode 100644 index fbc0c7e960..0000000000 --- a/docs/application_services.rst +++ /dev/null @@ -1,35 +0,0 @@ -Registering an Application Service -================================== - -The registration of new application services depends on the homeserver used. -In synapse, you need to create a new configuration file for your AS and add it -to the list specified under the ``app_service_config_files`` config -option in your synapse config. - -For example: - -.. code-block:: yaml - - app_service_config_files: - - /home/matrix/.synapse/.yaml - - -The format of the AS configuration file is as follows: - -.. code-block:: yaml - - url: - as_token: - hs_token: - sender_localpart: - namespaces: - users: # List of users we're interested in - - exclusive: - regex: - - ... - aliases: [] # List of aliases we're interested in - rooms: [] # List of room ids we're interested in - -See the spec_ for further details on how application services work. - -.. _spec: https://matrix.org/docs/spec/application_service/unstable.html diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000000..0c7f315f3f --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,65 @@ +# Synapse Architecture + +As of the end of Oct 2014, Synapse's overall architecture looks like: + + synapse + .-----------------------------------------------------. + | Notifier | + | ^ | | + | | | | + | .------------|------. | + | | handlers/ | | | + | | v | | + | | Event*Handler <--------> rest/* <=> Client + | | Rooms*Handler | | + HS <=> federation/* <==> FederationHandler | | + | | | PresenceHandler | | + | | | TypingHandler | | + | | '-------------------' | + | | | | | + | | state/* | | + | | | | | + | | v v | + | `--------------> storage/* | + | | | + '--------------------------|--------------------------' + v + .----. + | DB | + '----' + +- Handlers: business logic of synapse itself. Follows a set contract of BaseHandler: + - BaseHandler gives us onNewRoomEvent which: (TODO: flesh this out and make it less cryptic): + - handle_state(event) + - auth(event) + - persist_event(event) + - notify notifier or federation(event) + - PresenceHandler: use distributor to get EDUs out of Federation. + Very lightweight logic built on the distributor + - TypingHandler: use distributor to get EDUs out of Federation. + Very lightweight logic built on the distributor + - EventsHandler: handles the events stream... + - FederationHandler: - gets PDU from Federation Layer; turns into + an event; follows basehandler functionality. + - RoomsHandler: does all the room logic, including members - lots + of classes in RoomsHandler. + - ProfileHandler: talks to the storage to store/retrieve profile + info. +- EventFactory: generates events of particular event types. +- Notifier: Backs the events handler +- REST: Interfaces handlers and events to the outside world via + HTTP/JSON. Converts events back and forth from JSON. +- Federation: holds the HTTP client & server to talk to other servers. + Does replication to make sure there's nothing missing in the graph. + Handles reliability. Handles txns. +- Distributor: generic event bus. used for presence & typing only + currently. Notifier could be implemented using Distributor - so far + we are only using for things which actually /require/ dynamic + pluggability however as it can obfuscate the actual flow of control. +- Auth: helper singleton to say whether a given event is allowed to do + a given thing (TODO: put this on the diagram) +- State: helper singleton: does state conflict resolution. You give it + an event and it tells you if it actually updates the state or not, + and annotates the event up properly and handles merge conflict + resolution. +- Storage: abstracts the storage engine. diff --git a/docs/architecture.rst b/docs/architecture.rst deleted file mode 100644 index 98050428b9..0000000000 --- a/docs/architecture.rst +++ /dev/null @@ -1,68 +0,0 @@ -Synapse Architecture -==================== - -As of the end of Oct 2014, Synapse's overall architecture looks like:: - - synapse - .-----------------------------------------------------. - | Notifier | - | ^ | | - | | | | - | .------------|------. | - | | handlers/ | | | - | | v | | - | | Event*Handler <--------> rest/* <=> Client - | | Rooms*Handler | | - HSes <=> federation/* <==> FederationHandler | | - | | | PresenceHandler | | - | | | TypingHandler | | - | | '-------------------' | - | | | | | - | | state/* | | - | | | | | - | | v v | - | `--------------> storage/* | - | | | - '--------------------------|--------------------------' - v - .----. - | DB | - '----' - -* Handlers: business logic of synapse itself. Follows a set contract of BaseHandler: - - - BaseHandler gives us onNewRoomEvent which: (TODO: flesh this out and make it less cryptic): - - + handle_state(event) - + auth(event) - + persist_event(event) - + notify notifier or federation(event) - - - PresenceHandler: use distributor to get EDUs out of Federation. Very - lightweight logic built on the distributor - - TypingHandler: use distributor to get EDUs out of Federation. Very - lightweight logic built on the distributor - - EventsHandler: handles the events stream... - - FederationHandler: - gets PDU from Federation Layer; turns into an event; - follows basehandler functionality. - - RoomsHandler: does all the room logic, including members - lots of classes in - RoomsHandler. - - ProfileHandler: talks to the storage to store/retrieve profile info. - -* EventFactory: generates events of particular event types. -* Notifier: Backs the events handler -* REST: Interfaces handlers and events to the outside world via HTTP/JSON. - Converts events back and forth from JSON. -* Federation: holds the HTTP client & server to talk to other servers. Does - replication to make sure there's nothing missing in the graph. Handles - reliability. Handles txns. -* Distributor: generic event bus. used for presence & typing only currently. - Notifier could be implemented using Distributor - so far we are only using for - things which actually /require/ dynamic pluggability however as it can - obfuscate the actual flow of control. -* Auth: helper singleton to say whether a given event is allowed to do a given - thing (TODO: put this on the diagram) -* State: helper singleton: does state conflict resolution. You give it an event - and it tells you if it actually updates the state or not, and annotates the - event up properly and handles merge conflict resolution. -* Storage: abstracts the storage engine. diff --git a/docs/code_style.md b/docs/code_style.md new file mode 100644 index 0000000000..f983f72d6c --- /dev/null +++ b/docs/code_style.md @@ -0,0 +1,169 @@ +# Code Style + +## Formatting tools + +The Synapse codebase uses a number of code formatting tools in order to +quickly and automatically check for formatting (and sometimes logical) +errors in code. + +The necessary tools are detailed below. + +- **black** + + The Synapse codebase uses [black](https://pypi.org/project/black/) + as an opinionated code formatter, ensuring all comitted code is + properly formatted. + + First install `black` with: + + pip install --upgrade black + + Have `black` auto-format your code (it shouldn't change any + functionality) with: + + black . --exclude="\.tox|build|env" + +- **flake8** + + `flake8` is a code checking tool. We require code to pass `flake8` + before being merged into the codebase. + + Install `flake8` with: + + pip install --upgrade flake8 + + Check all application and test code with: + + flake8 synapse tests + +- **isort** + + `isort` ensures imports are nicely formatted, and can suggest and + auto-fix issues such as double-importing. + + Install `isort` with: + + pip install --upgrade isort + + Auto-fix imports with: + + isort -rc synapse tests + + `-rc` means to recursively search the given directories. + +It's worth noting that modern IDEs and text editors can run these tools +automatically on save. It may be worth looking into whether this +functionality is supported in your editor for a more convenient +development workflow. It is not, however, recommended to run `flake8` on +save as it takes a while and is very resource intensive. + +## General rules + +- **Naming**: + - Use camel case for class and type names + - Use underscores for functions and variables. +- **Docstrings**: should follow the [google code + style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings). + This is so that we can generate documentation with + [sphinx](http://sphinxcontrib-napoleon.readthedocs.org/en/latest/). + See the + [examples](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) + in the sphinx documentation. +- **Imports**: + - Imports should be sorted by `isort` as described above. + - Prefer to import classes and functions rather than packages or + modules. + + Example: + + from synapse.types import UserID + ... + user_id = UserID(local, server) + + is preferred over: + + from synapse import types + ... + user_id = types.UserID(local, server) + + (or any other variant). + + This goes against the advice in the Google style guide, but it + means that errors in the name are caught early (at import time). + + - Avoid wildcard imports (`from synapse.types import *`) and + relative imports (`from .types import UserID`). + +## Configuration file format + +The [sample configuration file](./sample_config.yaml) acts as a +reference to Synapse's configuration options for server administrators. +Remember that many readers will be unfamiliar with YAML and server +administration in general, so that it is important that the file be as +easy to understand as possible, which includes following a consistent +format. + +Some guidelines follow: + +- Sections should be separated with a heading consisting of a single + line prefixed and suffixed with `##`. There should be **two** blank + lines before the section header, and **one** after. +- Each option should be listed in the file with the following format: + - A comment describing the setting. Each line of this comment + should be prefixed with a hash (`#`) and a space. + + The comment should describe the default behaviour (ie, what + happens if the setting is omitted), as well as what the effect + will be if the setting is changed. + + Often, the comment end with something like "uncomment the + following to ". + + - A line consisting of only `#`. + - A commented-out example setting, prefixed with only `#`. + + For boolean (on/off) options, convention is that this example + should be the *opposite* to the default (so the comment will end + with "Uncomment the following to enable [or disable] + ." For other options, the example should give some + non-default value which is likely to be useful to the reader. + +- There should be a blank line between each option. +- Where several settings are grouped into a single dict, *avoid* the + convention where the whole block is commented out, resulting in + comment lines starting `# #`, as this is hard to read and confusing + to edit. Instead, leave the top-level config option uncommented, and + follow the conventions above for sub-options. Ensure that your code + correctly handles the top-level option being set to `None` (as it + will be if no sub-options are enabled). +- Lines should be wrapped at 80 characters. + +Example: + + ## Frobnication ## + + # The frobnicator will ensure that all requests are fully frobnicated. + # To enable it, uncomment the following. + # + #frobnicator_enabled: true + + # By default, the frobnicator will frobnicate with the default frobber. + # The following will make it use an alternative frobber. + # + #frobincator_frobber: special_frobber + + # Settings for the frobber + # + frobber: + # frobbing speed. Defaults to 1. + # + #speed: 10 + + # frobbing distance. Defaults to 1000. + # + #distance: 100 + +Note that the sample configuration is generated from the synapse code +and is maintained by a script, `scripts-dev/generate_sample_config`. +Making sure that the output from this script matches the desired format +is left as an exercise for the reader! diff --git a/docs/code_style.rst b/docs/code_style.rst deleted file mode 100644 index 39ac4ebedc..0000000000 --- a/docs/code_style.rst +++ /dev/null @@ -1,180 +0,0 @@ -Code Style -========== - -Formatting tools ----------------- - -The Synapse codebase uses a number of code formatting tools in order to -quickly and automatically check for formatting (and sometimes logical) errors -in code. - -The necessary tools are detailed below. - -- **black** - - The Synapse codebase uses `black `_ as an - opinionated code formatter, ensuring all comitted code is properly - formatted. - - First install ``black`` with:: - - pip install --upgrade black - - Have ``black`` auto-format your code (it shouldn't change any functionality) - with:: - - black . --exclude="\.tox|build|env" - -- **flake8** - - ``flake8`` is a code checking tool. We require code to pass ``flake8`` before being merged into the codebase. - - Install ``flake8`` with:: - - pip install --upgrade flake8 - - Check all application and test code with:: - - flake8 synapse tests - -- **isort** - - ``isort`` ensures imports are nicely formatted, and can suggest and - auto-fix issues such as double-importing. - - Install ``isort`` with:: - - pip install --upgrade isort - - Auto-fix imports with:: - - isort -rc synapse tests - - ``-rc`` means to recursively search the given directories. - -It's worth noting that modern IDEs and text editors can run these tools -automatically on save. It may be worth looking into whether this -functionality is supported in your editor for a more convenient development -workflow. It is not, however, recommended to run ``flake8`` on save as it -takes a while and is very resource intensive. - -General rules -------------- - -- **Naming**: - - - Use camel case for class and type names - - Use underscores for functions and variables. - -- **Docstrings**: should follow the `google code style - `_. - This is so that we can generate documentation with `sphinx - `_. See the - `examples - `_ - in the sphinx documentation. - -- **Imports**: - - - Imports should be sorted by ``isort`` as described above. - - - Prefer to import classes and functions rather than packages or modules. - - Example:: - - from synapse.types import UserID - ... - user_id = UserID(local, server) - - is preferred over:: - - from synapse import types - ... - user_id = types.UserID(local, server) - - (or any other variant). - - This goes against the advice in the Google style guide, but it means that - errors in the name are caught early (at import time). - - - Avoid wildcard imports (``from synapse.types import *``) and relative - imports (``from .types import UserID``). - -Configuration file format -------------------------- - -The `sample configuration file <./sample_config.yaml>`_ acts as a reference to -Synapse's configuration options for server administrators. Remember that many -readers will be unfamiliar with YAML and server administration in general, so -that it is important that the file be as easy to understand as possible, which -includes following a consistent format. - -Some guidelines follow: - -* Sections should be separated with a heading consisting of a single line - prefixed and suffixed with ``##``. There should be **two** blank lines - before the section header, and **one** after. - -* Each option should be listed in the file with the following format: - - * A comment describing the setting. Each line of this comment should be - prefixed with a hash (``#``) and a space. - - The comment should describe the default behaviour (ie, what happens if - the setting is omitted), as well as what the effect will be if the - setting is changed. - - Often, the comment end with something like "uncomment the - following to \". - - * A line consisting of only ``#``. - - * A commented-out example setting, prefixed with only ``#``. - - For boolean (on/off) options, convention is that this example should be - the *opposite* to the default (so the comment will end with "Uncomment - the following to enable [or disable] \." For other options, - the example should give some non-default value which is likely to be - useful to the reader. - -* There should be a blank line between each option. - -* Where several settings are grouped into a single dict, *avoid* the - convention where the whole block is commented out, resulting in comment - lines starting ``# #``, as this is hard to read and confusing to - edit. Instead, leave the top-level config option uncommented, and follow - the conventions above for sub-options. Ensure that your code correctly - handles the top-level option being set to ``None`` (as it will be if no - sub-options are enabled). - -* Lines should be wrapped at 80 characters. - -Example:: - - ## Frobnication ## - - # The frobnicator will ensure that all requests are fully frobnicated. - # To enable it, uncomment the following. - # - #frobnicator_enabled: true - - # By default, the frobnicator will frobnicate with the default frobber. - # The following will make it use an alternative frobber. - # - #frobincator_frobber: special_frobber - - # Settings for the frobber - # - frobber: - # frobbing speed. Defaults to 1. - # - #speed: 10 - - # frobbing distance. Defaults to 1000. - # - #distance: 100 - -Note that the sample configuration is generated from the synapse code and is -maintained by a script, ``scripts-dev/generate_sample_config``. Making sure -that the output from this script matches the desired format is left as an -exercise for the reader! diff --git a/docs/federate.md b/docs/federate.md index 6d6bb85e15..193e2d2dfe 100644 --- a/docs/federate.md +++ b/docs/federate.md @@ -148,7 +148,7 @@ We no longer actively recommend against using a reverse proxy. Many admins will find it easier to direct federation traffic to a reverse proxy and manage their own TLS certificates, and this is a supported configuration. -See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a +See [reverse_proxy.md](reverse_proxy.md) for information on setting up a reverse proxy. #### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy? @@ -184,7 +184,7 @@ a complicated dance which requires connections in both directions). Another common problem is that people on other servers can't join rooms that you invite them to. This can be caused by an incorrectly-configured reverse -proxy: see [reverse_proxy.rst]() for instructions on how to correctly +proxy: see [reverse_proxy.md]() for instructions on how to correctly configure a reverse proxy. ## Running a Demo Federation of Synapses diff --git a/docs/log_contexts.md b/docs/log_contexts.md new file mode 100644 index 0000000000..5331e8c88b --- /dev/null +++ b/docs/log_contexts.md @@ -0,0 +1,494 @@ +# Log Contexts + +To help track the processing of individual requests, synapse uses a +'`log context`' to track which request it is handling at any given +moment. This is done via a thread-local variable; a `logging.Filter` is +then used to fish the information back out of the thread-local variable +and add it to each log record. + +Logcontexts are also used for CPU and database accounting, so that we +can track which requests were responsible for high CPU use or database +activity. + +The `synapse.logging.context` module provides a facilities for managing +the current log context (as well as providing the `LoggingContextFilter` +class). + +Deferreds make the whole thing complicated, so this document describes +how it all works, and how to write code which follows the rules. + +##Logcontexts without Deferreds + +In the absence of any Deferred voodoo, things are simple enough. As with +any code of this nature, the rule is that our function should leave +things as it found them: + +```python +from synapse.logging import context # omitted from future snippets + +def handle_request(request_id): + request_context = context.LoggingContext() + + calling_context = context.LoggingContext.current_context() + context.LoggingContext.set_current_context(request_context) + try: + request_context.request = request_id + do_request_handling() + logger.debug("finished") + finally: + context.LoggingContext.set_current_context(calling_context) + +def do_request_handling(): + logger.debug("phew") # this will be logged against request_id +``` + +LoggingContext implements the context management methods, so the above +can be written much more succinctly as: + +```python +def handle_request(request_id): + with context.LoggingContext() as request_context: + request_context.request = request_id + do_request_handling() + logger.debug("finished") + +def do_request_handling(): + logger.debug("phew") +``` + +## Using logcontexts with Deferreds + +Deferreds --- and in particular, `defer.inlineCallbacks` --- break the +linear flow of code so that there is no longer a single entry point +where we should set the logcontext and a single exit point where we +should remove it. + +Consider the example above, where `do_request_handling` needs to do some +blocking operation, and returns a deferred: + +```python +@defer.inlineCallbacks +def handle_request(request_id): + with context.LoggingContext() as request_context: + request_context.request = request_id + yield do_request_handling() + logger.debug("finished") +``` + +In the above flow: + +- The logcontext is set +- `do_request_handling` is called, and returns a deferred +- `handle_request` yields the deferred +- The `inlineCallbacks` wrapper of `handle_request` returns a deferred + +So we have stopped processing the request (and will probably go on to +start processing the next), without clearing the logcontext. + +To circumvent this problem, synapse code assumes that, wherever you have +a deferred, you will want to yield on it. To that end, whereever +functions return a deferred, we adopt the following conventions: + +**Rules for functions returning deferreds:** + +> - If the deferred is already complete, the function returns with the +> same logcontext it started with. +> - If the deferred is incomplete, the function clears the logcontext +> before returning; when the deferred completes, it restores the +> logcontext before running any callbacks. + +That sounds complicated, but actually it means a lot of code (including +the example above) "just works". There are two cases: + +- If `do_request_handling` returns a completed deferred, then the + logcontext will still be in place. In this case, execution will + continue immediately after the `yield`; the "finished" line will + be logged against the right context, and the `with` block restores + the original context before we return to the caller. +- If the returned deferred is incomplete, `do_request_handling` clears + the logcontext before returning. The logcontext is therefore clear + when `handle_request` yields the deferred. At that point, the + `inlineCallbacks` wrapper adds a callback to the deferred, and + returns another (incomplete) deferred to the caller, and it is safe + to begin processing the next request. + + Once `do_request_handling`'s deferred completes, it will reinstate + the logcontext, before running the callback added by the + `inlineCallbacks` wrapper. That callback runs the second half of + `handle_request`, so again the "finished" line will be logged + against the right context, and the `with` block restores the + original context. + +As an aside, it's worth noting that `handle_request` follows our rules +-though that only matters if the caller has its own logcontext which it +cares about. + +The following sections describe pitfalls and helpful patterns when +implementing these rules. + +Always yield your deferreds +--------------------------- + +Whenever you get a deferred back from a function, you should `yield` on +it as soon as possible. (Returning it directly to your caller is ok too, +if you're not doing `inlineCallbacks`.) Do not pass go; do not do any +logging; do not call any other functions. + +```python +@defer.inlineCallbacks +def fun(): + logger.debug("starting") + yield do_some_stuff() # just like this + + d = more_stuff() + result = yield d # also fine, of course + + return result + +def nonInlineCallbacksFun(): + logger.debug("just a wrapper really") + return do_some_stuff() # this is ok too - the caller will yield on + # it anyway. +``` + +Provided this pattern is followed all the way back up to the callchain +to where the logcontext was set, this will make things work out ok: +provided `do_some_stuff` and `more_stuff` follow the rules above, then +so will `fun` (as wrapped by `inlineCallbacks`) and +`nonInlineCallbacksFun`. + +It's all too easy to forget to `yield`: for instance if we forgot that +`do_some_stuff` returned a deferred, we might plough on regardless. This +leads to a mess; it will probably work itself out eventually, but not +before a load of stuff has been logged against the wrong context. +(Normally, other things will break, more obviously, if you forget to +`yield`, so this tends not to be a major problem in practice.) + +Of course sometimes you need to do something a bit fancier with your +Deferreds - not all code follows the linear A-then-B-then-C pattern. +Notes on implementing more complex patterns are in later sections. + +## Where you create a new Deferred, make it follow the rules + +Most of the time, a Deferred comes from another synapse function. +Sometimes, though, we need to make up a new Deferred, or we get a +Deferred back from external code. We need to make it follow our rules. + +The easy way to do it is with a combination of `defer.inlineCallbacks`, +and `context.PreserveLoggingContext`. Suppose we want to implement +`sleep`, which returns a deferred which will run its callbacks after a +given number of seconds. That might look like: + +```python +# not a logcontext-rules-compliant function +def get_sleep_deferred(seconds): + d = defer.Deferred() + reactor.callLater(seconds, d.callback, None) + return d +``` + +That doesn't follow the rules, but we can fix it by wrapping it with +`PreserveLoggingContext` and `yield` ing on it: + +```python +@defer.inlineCallbacks +def sleep(seconds): + with PreserveLoggingContext(): + yield get_sleep_deferred(seconds) +``` + +This technique works equally for external functions which return +deferreds, or deferreds we have made ourselves. + +You can also use `context.make_deferred_yieldable`, which just does the +boilerplate for you, so the above could be written: + +```python +def sleep(seconds): + return context.make_deferred_yieldable(get_sleep_deferred(seconds)) +``` + +## Fire-and-forget + +Sometimes you want to fire off a chain of execution, but not wait for +its result. That might look a bit like this: + +```python +@defer.inlineCallbacks +def do_request_handling(): + yield foreground_operation() + + # *don't* do this + background_operation() + + logger.debug("Request handling complete") + +@defer.inlineCallbacks +def background_operation(): + yield first_background_step() + logger.debug("Completed first step") + yield second_background_step() + logger.debug("Completed second step") +``` + +The above code does a couple of steps in the background after +`do_request_handling` has finished. The log lines are still logged +against the `request_context` logcontext, which may or may not be +desirable. There are two big problems with the above, however. The first +problem is that, if `background_operation` returns an incomplete +Deferred, it will expect its caller to `yield` immediately, so will have +cleared the logcontext. In this example, that means that 'Request +handling complete' will be logged without any context. + +The second problem, which is potentially even worse, is that when the +Deferred returned by `background_operation` completes, it will restore +the original logcontext. There is nothing waiting on that Deferred, so +the logcontext will leak into the reactor and possibly get attached to +some arbitrary future operation. + +There are two potential solutions to this. + +One option is to surround the call to `background_operation` with a +`PreserveLoggingContext` call. That will reset the logcontext before +starting `background_operation` (so the context restored when the +deferred completes will be the empty logcontext), and will restore the +current logcontext before continuing the foreground process: + +```python +@defer.inlineCallbacks +def do_request_handling(): + yield foreground_operation() + + # start background_operation off in the empty logcontext, to + # avoid leaking the current context into the reactor. + with PreserveLoggingContext(): + background_operation() + + # this will now be logged against the request context + logger.debug("Request handling complete") +``` + +Obviously that option means that the operations done in +`background_operation` would be not be logged against a logcontext +(though that might be fixed by setting a different logcontext via a +`with LoggingContext(...)` in `background_operation`). + +The second option is to use `context.run_in_background`, which wraps a +function so that it doesn't reset the logcontext even when it returns +an incomplete deferred, and adds a callback to the returned deferred to +reset the logcontext. In other words, it turns a function that follows +the Synapse rules about logcontexts and Deferreds into one which behaves +more like an external function --- the opposite operation to that +described in the previous section. It can be used like this: + +```python +@defer.inlineCallbacks +def do_request_handling(): + yield foreground_operation() + + context.run_in_background(background_operation) + + # this will now be logged against the request context + logger.debug("Request handling complete") +``` + +## Passing synapse deferreds into third-party functions + +A typical example of this is where we want to collect together two or +more deferred via `defer.gatherResults`: + +```python +d1 = operation1() +d2 = operation2() +d3 = defer.gatherResults([d1, d2]) +``` + +This is really a variation of the fire-and-forget problem above, in that +we are firing off `d1` and `d2` without yielding on them. The difference +is that we now have third-party code attached to their callbacks. Anyway +either technique given in the [Fire-and-forget](#fire-and-forget) +section will work. + +Of course, the new Deferred returned by `gatherResults` needs to be +wrapped in order to make it follow the logcontext rules before we can +yield it, as described in [Where you create a new Deferred, make it +follow the +rules](#where-you-create-a-new-deferred-make-it-follow-the-rules). + +So, option one: reset the logcontext before starting the operations to +be gathered: + +```python +@defer.inlineCallbacks +def do_request_handling(): + with PreserveLoggingContext(): + d1 = operation1() + d2 = operation2() + result = yield defer.gatherResults([d1, d2]) +``` + +In this case particularly, though, option two, of using +`context.preserve_fn` almost certainly makes more sense, so that +`operation1` and `operation2` are both logged against the original +logcontext. This looks like: + +```python +@defer.inlineCallbacks +def do_request_handling(): + d1 = context.preserve_fn(operation1)() + d2 = context.preserve_fn(operation2)() + + with PreserveLoggingContext(): + result = yield defer.gatherResults([d1, d2]) +``` + +## Was all this really necessary? + +The conventions used work fine for a linear flow where everything +happens in series via `defer.inlineCallbacks` and `yield`, but are +certainly tricky to follow for any more exotic flows. It's hard not to +wonder if we could have done something else. + +We're not going to rewrite Synapse now, so the following is entirely of +academic interest, but I'd like to record some thoughts on an +alternative approach. + +I briefly prototyped some code following an alternative set of rules. I +think it would work, but I certainly didn't get as far as thinking how +it would interact with concepts as complicated as the cache descriptors. + +My alternative rules were: + +- functions always preserve the logcontext of their caller, whether or + not they are returning a Deferred. +- Deferreds returned by synapse functions run their callbacks in the + same context as the function was orignally called in. + +The main point of this scheme is that everywhere that sets the +logcontext is responsible for clearing it before returning control to +the reactor. + +So, for example, if you were the function which started a +`with LoggingContext` block, you wouldn't `yield` within it --- instead +you'd start off the background process, and then leave the `with` block +to wait for it: + +```python +def handle_request(request_id): + with context.LoggingContext() as request_context: + request_context.request = request_id + d = do_request_handling() + + def cb(r): + logger.debug("finished") + + d.addCallback(cb) + return d +``` + +(in general, mixing `with LoggingContext` blocks and +`defer.inlineCallbacks` in the same function leads to slighly +counter-intuitive code, under this scheme). + +Because we leave the original `with` block as soon as the Deferred is +returned (as opposed to waiting for it to be resolved, as we do today), +the logcontext is cleared before control passes back to the reactor; so +if there is some code within `do_request_handling` which needs to wait +for a Deferred to complete, there is no need for it to worry about +clearing the logcontext before doing so: + +```python +def handle_request(): + r = do_some_stuff() + r.addCallback(do_some_more_stuff) + return r +``` + +--- and provided `do_some_stuff` follows the rules of returning a +Deferred which runs its callbacks in the original logcontext, all is +happy. + +The business of a Deferred which runs its callbacks in the original +logcontext isn't hard to achieve --- we have it today, in the shape of +`context._PreservingContextDeferred`: + +```python +def do_some_stuff(): + deferred = do_some_io() + pcd = _PreservingContextDeferred(LoggingContext.current_context()) + deferred.chainDeferred(pcd) + return pcd +``` + +It turns out that, thanks to the way that Deferreds chain together, we +automatically get the property of a context-preserving deferred with +`defer.inlineCallbacks`, provided the final Defered the function +`yields` on has that property. So we can just write: + +```python +@defer.inlineCallbacks +def handle_request(): + yield do_some_stuff() + yield do_some_more_stuff() +``` + +To conclude: I think this scheme would have worked equally well, with +less danger of messing it up, and probably made some more esoteric code +easier to write. But again --- changing the conventions of the entire +Synapse codebase is not a sensible option for the marginal improvement +offered. + +## A note on garbage-collection of Deferred chains + +It turns out that our logcontext rules do not play nicely with Deferred +chains which get orphaned and garbage-collected. + +Imagine we have some code that looks like this: + +```python +listener_queue = [] + +def on_something_interesting(): + for d in listener_queue: + d.callback("foo") + +@defer.inlineCallbacks +def await_something_interesting(): + new_deferred = defer.Deferred() + listener_queue.append(new_deferred) + + with PreserveLoggingContext(): + yield new_deferred +``` + +Obviously, the idea here is that we have a bunch of things which are +waiting for an event. (It's just an example of the problem here, but a +relatively common one.) + +Now let's imagine two further things happen. First of all, whatever was +waiting for the interesting thing goes away. (Perhaps the request times +out, or something *even more* interesting happens.) + +Secondly, let's suppose that we decide that the interesting thing is +never going to happen, and we reset the listener queue: + +```python +def reset_listener_queue(): + listener_queue.clear() +``` + +So, both ends of the deferred chain have now dropped their references, +and the deferred chain is now orphaned, and will be garbage-collected at +some point. Note that `await_something_interesting` is a generator +function, and when Python garbage-collects generator functions, it gives +them a chance to clean up by making the `yield` raise a `GeneratorExit` +exception. In our case, that means that the `__exit__` handler of +`PreserveLoggingContext` will carefully restore the request context, but +there is now nothing waiting for its return, so the request context is +never cleared. + +To reiterate, this problem only arises when *both* ends of a deferred +chain are dropped. Dropping the the reference to a deferred you're +supposed to be calling is probably bad practice, so this doesn't +actually happen too much. Unfortunately, when it does happen, it will +lead to leaked logcontexts which are incredibly hard to track down. diff --git a/docs/log_contexts.rst b/docs/log_contexts.rst deleted file mode 100644 index 4502cd9454..0000000000 --- a/docs/log_contexts.rst +++ /dev/null @@ -1,498 +0,0 @@ -Log Contexts -============ - -.. contents:: - -To help track the processing of individual requests, synapse uses a -'log context' to track which request it is handling at any given moment. This -is done via a thread-local variable; a ``logging.Filter`` is then used to fish -the information back out of the thread-local variable and add it to each log -record. - -Logcontexts are also used for CPU and database accounting, so that we can track -which requests were responsible for high CPU use or database activity. - -The ``synapse.logging.context`` module provides a facilities for managing the -current log context (as well as providing the ``LoggingContextFilter`` class). - -Deferreds make the whole thing complicated, so this document describes how it -all works, and how to write code which follows the rules. - -Logcontexts without Deferreds ------------------------------ - -In the absence of any Deferred voodoo, things are simple enough. As with any -code of this nature, the rule is that our function should leave things as it -found them: - -.. code:: python - - from synapse.logging import context # omitted from future snippets - - def handle_request(request_id): - request_context = context.LoggingContext() - - calling_context = context.LoggingContext.current_context() - context.LoggingContext.set_current_context(request_context) - try: - request_context.request = request_id - do_request_handling() - logger.debug("finished") - finally: - context.LoggingContext.set_current_context(calling_context) - - def do_request_handling(): - logger.debug("phew") # this will be logged against request_id - - -LoggingContext implements the context management methods, so the above can be -written much more succinctly as: - -.. code:: python - - def handle_request(request_id): - with context.LoggingContext() as request_context: - request_context.request = request_id - do_request_handling() - logger.debug("finished") - - def do_request_handling(): - logger.debug("phew") - - -Using logcontexts with Deferreds --------------------------------- - -Deferreds — and in particular, ``defer.inlineCallbacks`` — break -the linear flow of code so that there is no longer a single entry point where -we should set the logcontext and a single exit point where we should remove it. - -Consider the example above, where ``do_request_handling`` needs to do some -blocking operation, and returns a deferred: - -.. code:: python - - @defer.inlineCallbacks - def handle_request(request_id): - with context.LoggingContext() as request_context: - request_context.request = request_id - yield do_request_handling() - logger.debug("finished") - - -In the above flow: - -* The logcontext is set -* ``do_request_handling`` is called, and returns a deferred -* ``handle_request`` yields the deferred -* The ``inlineCallbacks`` wrapper of ``handle_request`` returns a deferred - -So we have stopped processing the request (and will probably go on to start -processing the next), without clearing the logcontext. - -To circumvent this problem, synapse code assumes that, wherever you have a -deferred, you will want to yield on it. To that end, whereever functions return -a deferred, we adopt the following conventions: - -**Rules for functions returning deferreds:** - - * If the deferred is already complete, the function returns with the same - logcontext it started with. - * If the deferred is incomplete, the function clears the logcontext before - returning; when the deferred completes, it restores the logcontext before - running any callbacks. - -That sounds complicated, but actually it means a lot of code (including the -example above) "just works". There are two cases: - -* If ``do_request_handling`` returns a completed deferred, then the logcontext - will still be in place. In this case, execution will continue immediately - after the ``yield``; the "finished" line will be logged against the right - context, and the ``with`` block restores the original context before we - return to the caller. - -* If the returned deferred is incomplete, ``do_request_handling`` clears the - logcontext before returning. The logcontext is therefore clear when - ``handle_request`` yields the deferred. At that point, the ``inlineCallbacks`` - wrapper adds a callback to the deferred, and returns another (incomplete) - deferred to the caller, and it is safe to begin processing the next request. - - Once ``do_request_handling``'s deferred completes, it will reinstate the - logcontext, before running the callback added by the ``inlineCallbacks`` - wrapper. That callback runs the second half of ``handle_request``, so again - the "finished" line will be logged against the right - context, and the ``with`` block restores the original context. - -As an aside, it's worth noting that ``handle_request`` follows our rules - -though that only matters if the caller has its own logcontext which it cares -about. - -The following sections describe pitfalls and helpful patterns when implementing -these rules. - -Always yield your deferreds ---------------------------- - -Whenever you get a deferred back from a function, you should ``yield`` on it -as soon as possible. (Returning it directly to your caller is ok too, if you're -not doing ``inlineCallbacks``.) Do not pass go; do not do any logging; do not -call any other functions. - -.. code:: python - - @defer.inlineCallbacks - def fun(): - logger.debug("starting") - yield do_some_stuff() # just like this - - d = more_stuff() - result = yield d # also fine, of course - - return result - - def nonInlineCallbacksFun(): - logger.debug("just a wrapper really") - return do_some_stuff() # this is ok too - the caller will yield on - # it anyway. - -Provided this pattern is followed all the way back up to the callchain to where -the logcontext was set, this will make things work out ok: provided -``do_some_stuff`` and ``more_stuff`` follow the rules above, then so will -``fun`` (as wrapped by ``inlineCallbacks``) and ``nonInlineCallbacksFun``. - -It's all too easy to forget to ``yield``: for instance if we forgot that -``do_some_stuff`` returned a deferred, we might plough on regardless. This -leads to a mess; it will probably work itself out eventually, but not before -a load of stuff has been logged against the wrong context. (Normally, other -things will break, more obviously, if you forget to ``yield``, so this tends -not to be a major problem in practice.) - -Of course sometimes you need to do something a bit fancier with your Deferreds -- not all code follows the linear A-then-B-then-C pattern. Notes on -implementing more complex patterns are in later sections. - -Where you create a new Deferred, make it follow the rules ---------------------------------------------------------- - -Most of the time, a Deferred comes from another synapse function. Sometimes, -though, we need to make up a new Deferred, or we get a Deferred back from -external code. We need to make it follow our rules. - -The easy way to do it is with a combination of ``defer.inlineCallbacks``, and -``context.PreserveLoggingContext``. Suppose we want to implement ``sleep``, -which returns a deferred which will run its callbacks after a given number of -seconds. That might look like: - -.. code:: python - - # not a logcontext-rules-compliant function - def get_sleep_deferred(seconds): - d = defer.Deferred() - reactor.callLater(seconds, d.callback, None) - return d - -That doesn't follow the rules, but we can fix it by wrapping it with -``PreserveLoggingContext`` and ``yield`` ing on it: - -.. code:: python - - @defer.inlineCallbacks - def sleep(seconds): - with PreserveLoggingContext(): - yield get_sleep_deferred(seconds) - -This technique works equally for external functions which return deferreds, -or deferreds we have made ourselves. - -You can also use ``context.make_deferred_yieldable``, which just does the -boilerplate for you, so the above could be written: - -.. code:: python - - def sleep(seconds): - return context.make_deferred_yieldable(get_sleep_deferred(seconds)) - - -Fire-and-forget ---------------- - -Sometimes you want to fire off a chain of execution, but not wait for its -result. That might look a bit like this: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - yield foreground_operation() - - # *don't* do this - background_operation() - - logger.debug("Request handling complete") - - @defer.inlineCallbacks - def background_operation(): - yield first_background_step() - logger.debug("Completed first step") - yield second_background_step() - logger.debug("Completed second step") - -The above code does a couple of steps in the background after -``do_request_handling`` has finished. The log lines are still logged against -the ``request_context`` logcontext, which may or may not be desirable. There -are two big problems with the above, however. The first problem is that, if -``background_operation`` returns an incomplete Deferred, it will expect its -caller to ``yield`` immediately, so will have cleared the logcontext. In this -example, that means that 'Request handling complete' will be logged without any -context. - -The second problem, which is potentially even worse, is that when the Deferred -returned by ``background_operation`` completes, it will restore the original -logcontext. There is nothing waiting on that Deferred, so the logcontext will -leak into the reactor and possibly get attached to some arbitrary future -operation. - -There are two potential solutions to this. - -One option is to surround the call to ``background_operation`` with a -``PreserveLoggingContext`` call. That will reset the logcontext before -starting ``background_operation`` (so the context restored when the deferred -completes will be the empty logcontext), and will restore the current -logcontext before continuing the foreground process: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - yield foreground_operation() - - # start background_operation off in the empty logcontext, to - # avoid leaking the current context into the reactor. - with PreserveLoggingContext(): - background_operation() - - # this will now be logged against the request context - logger.debug("Request handling complete") - -Obviously that option means that the operations done in -``background_operation`` would be not be logged against a logcontext (though -that might be fixed by setting a different logcontext via a ``with -LoggingContext(...)`` in ``background_operation``). - -The second option is to use ``context.run_in_background``, which wraps a -function so that it doesn't reset the logcontext even when it returns an -incomplete deferred, and adds a callback to the returned deferred to reset the -logcontext. In other words, it turns a function that follows the Synapse rules -about logcontexts and Deferreds into one which behaves more like an external -function — the opposite operation to that described in the previous section. -It can be used like this: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - yield foreground_operation() - - context.run_in_background(background_operation) - - # this will now be logged against the request context - logger.debug("Request handling complete") - -Passing synapse deferreds into third-party functions ----------------------------------------------------- - -A typical example of this is where we want to collect together two or more -deferred via ``defer.gatherResults``: - -.. code:: python - - d1 = operation1() - d2 = operation2() - d3 = defer.gatherResults([d1, d2]) - -This is really a variation of the fire-and-forget problem above, in that we are -firing off ``d1`` and ``d2`` without yielding on them. The difference -is that we now have third-party code attached to their callbacks. Anyway either -technique given in the `Fire-and-forget`_ section will work. - -Of course, the new Deferred returned by ``gatherResults`` needs to be wrapped -in order to make it follow the logcontext rules before we can yield it, as -described in `Where you create a new Deferred, make it follow the rules`_. - -So, option one: reset the logcontext before starting the operations to be -gathered: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - with PreserveLoggingContext(): - d1 = operation1() - d2 = operation2() - result = yield defer.gatherResults([d1, d2]) - -In this case particularly, though, option two, of using -``context.preserve_fn`` almost certainly makes more sense, so that -``operation1`` and ``operation2`` are both logged against the original -logcontext. This looks like: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - d1 = context.preserve_fn(operation1)() - d2 = context.preserve_fn(operation2)() - - with PreserveLoggingContext(): - result = yield defer.gatherResults([d1, d2]) - - -Was all this really necessary? ------------------------------- - -The conventions used work fine for a linear flow where everything happens in -series via ``defer.inlineCallbacks`` and ``yield``, but are certainly tricky to -follow for any more exotic flows. It's hard not to wonder if we could have done -something else. - -We're not going to rewrite Synapse now, so the following is entirely of -academic interest, but I'd like to record some thoughts on an alternative -approach. - -I briefly prototyped some code following an alternative set of rules. I think -it would work, but I certainly didn't get as far as thinking how it would -interact with concepts as complicated as the cache descriptors. - -My alternative rules were: - -* functions always preserve the logcontext of their caller, whether or not they - are returning a Deferred. - -* Deferreds returned by synapse functions run their callbacks in the same - context as the function was orignally called in. - -The main point of this scheme is that everywhere that sets the logcontext is -responsible for clearing it before returning control to the reactor. - -So, for example, if you were the function which started a ``with -LoggingContext`` block, you wouldn't ``yield`` within it — instead you'd start -off the background process, and then leave the ``with`` block to wait for it: - -.. code:: python - - def handle_request(request_id): - with context.LoggingContext() as request_context: - request_context.request = request_id - d = do_request_handling() - - def cb(r): - logger.debug("finished") - - d.addCallback(cb) - return d - -(in general, mixing ``with LoggingContext`` blocks and -``defer.inlineCallbacks`` in the same function leads to slighly -counter-intuitive code, under this scheme). - -Because we leave the original ``with`` block as soon as the Deferred is -returned (as opposed to waiting for it to be resolved, as we do today), the -logcontext is cleared before control passes back to the reactor; so if there is -some code within ``do_request_handling`` which needs to wait for a Deferred to -complete, there is no need for it to worry about clearing the logcontext before -doing so: - -.. code:: python - - def handle_request(): - r = do_some_stuff() - r.addCallback(do_some_more_stuff) - return r - -— and provided ``do_some_stuff`` follows the rules of returning a Deferred which -runs its callbacks in the original logcontext, all is happy. - -The business of a Deferred which runs its callbacks in the original logcontext -isn't hard to achieve — we have it today, in the shape of -``context._PreservingContextDeferred``: - -.. code:: python - - def do_some_stuff(): - deferred = do_some_io() - pcd = _PreservingContextDeferred(LoggingContext.current_context()) - deferred.chainDeferred(pcd) - return pcd - -It turns out that, thanks to the way that Deferreds chain together, we -automatically get the property of a context-preserving deferred with -``defer.inlineCallbacks``, provided the final Defered the function ``yields`` -on has that property. So we can just write: - -.. code:: python - - @defer.inlineCallbacks - def handle_request(): - yield do_some_stuff() - yield do_some_more_stuff() - -To conclude: I think this scheme would have worked equally well, with less -danger of messing it up, and probably made some more esoteric code easier to -write. But again — changing the conventions of the entire Synapse codebase is -not a sensible option for the marginal improvement offered. - - -A note on garbage-collection of Deferred chains ------------------------------------------------ - -It turns out that our logcontext rules do not play nicely with Deferred -chains which get orphaned and garbage-collected. - -Imagine we have some code that looks like this: - -.. code:: python - - listener_queue = [] - - def on_something_interesting(): - for d in listener_queue: - d.callback("foo") - - @defer.inlineCallbacks - def await_something_interesting(): - new_deferred = defer.Deferred() - listener_queue.append(new_deferred) - - with PreserveLoggingContext(): - yield new_deferred - -Obviously, the idea here is that we have a bunch of things which are waiting -for an event. (It's just an example of the problem here, but a relatively -common one.) - -Now let's imagine two further things happen. First of all, whatever was -waiting for the interesting thing goes away. (Perhaps the request times out, -or something *even more* interesting happens.) - -Secondly, let's suppose that we decide that the interesting thing is never -going to happen, and we reset the listener queue: - -.. code:: python - - def reset_listener_queue(): - listener_queue.clear() - -So, both ends of the deferred chain have now dropped their references, and the -deferred chain is now orphaned, and will be garbage-collected at some point. -Note that ``await_something_interesting`` is a generator function, and when -Python garbage-collects generator functions, it gives them a chance to clean -up by making the ``yield`` raise a ``GeneratorExit`` exception. In our case, -that means that the ``__exit__`` handler of ``PreserveLoggingContext`` will -carefully restore the request context, but there is now nothing waiting for -its return, so the request context is never cleared. - -To reiterate, this problem only arises when *both* ends of a deferred chain -are dropped. Dropping the the reference to a deferred you're supposed to be -calling is probably bad practice, so this doesn't actually happen too much. -Unfortunately, when it does happen, it will lead to leaked logcontexts which -are incredibly hard to track down. diff --git a/docs/media_repository.md b/docs/media_repository.md new file mode 100644 index 0000000000..1bf8f16f55 --- /dev/null +++ b/docs/media_repository.md @@ -0,0 +1,30 @@ +# Media Repository + +*Synapse implementation-specific details for the media repository* + +The media repository is where attachments and avatar photos are stored. +It stores attachment content and thumbnails for media uploaded by local users. +It caches attachment content and thumbnails for media uploaded by remote users. + +## Storage + +Each item of media is assigned a `media_id` when it is uploaded. +The `media_id` is a randomly chosen, URL safe 24 character string. + +Metadata such as the MIME type, upload time and length are stored in the +sqlite3 database indexed by `media_id`. + +Content is stored on the filesystem under a `"local_content"` directory. + +Thumbnails are stored under a `"local_thumbnails"` directory. + +The item with `media_id` `"aabbccccccccdddddddddddd"` is stored under +`"local_content/aa/bb/ccccccccdddddddddddd"`. Its thumbnail with width +`128` and height `96` and type `"image/jpeg"` is stored under +`"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"` + +Remote content is cached under `"remote_content"` directory. Each item of +remote content is assigned a local `"filesystem_id"` to ensure that the +directory structure `"remote_content/server_name/aa/bb/ccccccccdddddddddddd"` +is appropriate. Thumbnails for remote content are stored under +`"remote_thumbnails/server_name/..."` diff --git a/docs/media_repository.rst b/docs/media_repository.rst deleted file mode 100644 index 1037b5be63..0000000000 --- a/docs/media_repository.rst +++ /dev/null @@ -1,27 +0,0 @@ -Media Repository -================ - -*Synapse implementation-specific details for the media repository* - -The media repository is where attachments and avatar photos are stored. -It stores attachment content and thumbnails for media uploaded by local users. -It caches attachment content and thumbnails for media uploaded by remote users. - -Storage -------- - -Each item of media is assigned a ``media_id`` when it is uploaded. -The ``media_id`` is a randomly chosen, URL safe 24 character string. -Metadata such as the MIME type, upload time and length are stored in the -sqlite3 database indexed by ``media_id``. -Content is stored on the filesystem under a ``"local_content"`` directory. -Thumbnails are stored under a ``"local_thumbnails"`` directory. -The item with ``media_id`` ``"aabbccccccccdddddddddddd"`` is stored under -``"local_content/aa/bb/ccccccccdddddddddddd"``. Its thumbnail with width -``128`` and height ``96`` and type ``"image/jpeg"`` is stored under -``"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"`` -Remote content is cached under ``"remote_content"`` directory. Each item of -remote content is assigned a local "``filesystem_id``" to ensure that the -directory structure ``"remote_content/server_name/aa/bb/ccccccccdddddddddddd"`` -is appropriate. Thumbnails for remote content are stored under -``"remote_thumbnails/server_name/..."`` diff --git a/docs/metrics-howto.md b/docs/metrics-howto.md new file mode 100644 index 0000000000..32abb9f44e --- /dev/null +++ b/docs/metrics-howto.md @@ -0,0 +1,217 @@ +# How to monitor Synapse metrics using Prometheus + +1. Install Prometheus: + + Follow instructions at + + +1. Enable Synapse metrics: + + There are two methods of enabling metrics in Synapse. + + The first serves the metrics as a part of the usual web server and + can be enabled by adding the \"metrics\" resource to the existing + listener as such: + + resources: + - names: + - client + - metrics + + This provides a simple way of adding metrics to your Synapse + installation, and serves under `/_synapse/metrics`. If you do not + wish your metrics be publicly exposed, you will need to either + filter it out at your load balancer, or use the second method. + + The second method runs the metrics server on a different port, in a + different thread to Synapse. This can make it more resilient to + heavy load meaning metrics cannot be retrieved, and can be exposed + to just internal networks easier. The served metrics are available + over HTTP only, and will be available at `/`. + + Add a new listener to homeserver.yaml: + + listeners: + - type: metrics + port: 9000 + bind_addresses: + - '0.0.0.0' + + For both options, you will need to ensure that `enable_metrics` is + set to `True`. + +1. Restart Synapse. + +1. Add a Prometheus target for Synapse. + + It needs to set the `metrics_path` to a non-default value (under + `scrape_configs`): + + - job_name: "synapse" + metrics_path: "/_synapse/metrics" + static_configs: + - targets: ["my.server.here:port"] + + where `my.server.here` is the IP address of Synapse, and `port` is + the listener port configured with the `metrics` resource. + + If your prometheus is older than 1.5.2, you will need to replace + `static_configs` in the above with `target_groups`. + +1. Restart Prometheus. + +## Renaming of metrics & deprecation of old names in 1.2 + +Synapse 1.2 updates the Prometheus metrics to match the naming +convention of the upstream `prometheus_client`. The old names are +considered deprecated and will be removed in a future version of +Synapse. + +| New Name | Old Name | +| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------- | +| python_gc_objects_collected_total | python_gc_objects_collected | +| python_gc_objects_uncollectable_total | python_gc_objects_uncollectable | +| python_gc_collections_total | python_gc_collections | +| process_cpu_seconds_total | process_cpu_seconds | +| synapse_federation_client_sent_transactions_total | synapse_federation_client_sent_transactions | +| synapse_federation_client_events_processed_total | synapse_federation_client_events_processed | +| synapse_event_processing_loop_count_total | synapse_event_processing_loop_count | +| synapse_event_processing_loop_room_count_total | synapse_event_processing_loop_room_count | +| synapse_util_metrics_block_count_total | synapse_util_metrics_block_count | +| synapse_util_metrics_block_time_seconds_total | synapse_util_metrics_block_time_seconds | +| synapse_util_metrics_block_ru_utime_seconds_total | synapse_util_metrics_block_ru_utime_seconds | +| synapse_util_metrics_block_ru_stime_seconds_total | synapse_util_metrics_block_ru_stime_seconds | +| synapse_util_metrics_block_db_txn_count_total | synapse_util_metrics_block_db_txn_count | +| synapse_util_metrics_block_db_txn_duration_seconds_total | synapse_util_metrics_block_db_txn_duration_seconds | +| synapse_util_metrics_block_db_sched_duration_seconds_total | synapse_util_metrics_block_db_sched_duration_seconds | +| synapse_background_process_start_count_total | synapse_background_process_start_count | +| synapse_background_process_ru_utime_seconds_total | synapse_background_process_ru_utime_seconds | +| synapse_background_process_ru_stime_seconds_total | synapse_background_process_ru_stime_seconds | +| synapse_background_process_db_txn_count_total | synapse_background_process_db_txn_count | +| synapse_background_process_db_txn_duration_seconds_total | synapse_background_process_db_txn_duration_seconds | +| synapse_background_process_db_sched_duration_seconds_total | synapse_background_process_db_sched_duration_seconds | +| synapse_storage_events_persisted_events_total | synapse_storage_events_persisted_events | +| synapse_storage_events_persisted_events_sep_total | synapse_storage_events_persisted_events_sep | +| synapse_storage_events_state_delta_total | synapse_storage_events_state_delta | +| synapse_storage_events_state_delta_single_event_total | synapse_storage_events_state_delta_single_event | +| synapse_storage_events_state_delta_reuse_delta_total | synapse_storage_events_state_delta_reuse_delta | +| synapse_federation_server_received_pdus_total | synapse_federation_server_received_pdus | +| synapse_federation_server_received_edus_total | synapse_federation_server_received_edus | +| synapse_handler_presence_notified_presence_total | synapse_handler_presence_notified_presence | +| synapse_handler_presence_federation_presence_out_total | synapse_handler_presence_federation_presence_out | +| synapse_handler_presence_presence_updates_total | synapse_handler_presence_presence_updates | +| synapse_handler_presence_timers_fired_total | synapse_handler_presence_timers_fired | +| synapse_handler_presence_federation_presence_total | synapse_handler_presence_federation_presence | +| synapse_handler_presence_bump_active_time_total | synapse_handler_presence_bump_active_time | +| synapse_federation_client_sent_edus_total | synapse_federation_client_sent_edus | +| synapse_federation_client_sent_pdu_destinations_count_total | synapse_federation_client_sent_pdu_destinations:count | +| synapse_federation_client_sent_pdu_destinations_total | synapse_federation_client_sent_pdu_destinations:total | +| synapse_handlers_appservice_events_processed_total | synapse_handlers_appservice_events_processed | +| synapse_notifier_notified_events_total | synapse_notifier_notified_events | +| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter | +| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter | +| synapse_http_httppusher_http_pushes_processed_total | synapse_http_httppusher_http_pushes_processed | +| synapse_http_httppusher_http_pushes_failed_total | synapse_http_httppusher_http_pushes_failed | +| synapse_http_httppusher_badge_updates_processed_total | synapse_http_httppusher_badge_updates_processed | +| synapse_http_httppusher_badge_updates_failed_total | synapse_http_httppusher_badge_updates_failed | + +Removal of deprecated metrics & time based counters becoming histograms in 0.31.0 +--------------------------------------------------------------------------------- + +The duplicated metrics deprecated in Synapse 0.27.0 have been removed. + +All time duration-based metrics have been changed to be seconds. This +affects: + +| msec -> sec metrics | +| -------------------------------------- | +| python_gc_time | +| python_twisted_reactor_tick_time | +| synapse_storage_query_time | +| synapse_storage_schedule_time | +| synapse_storage_transaction_time | + +Several metrics have been changed to be histograms, which sort entries +into buckets and allow better analysis. The following metrics are now +histograms: + +| Altered metrics | +| ------------------------------------------------ | +| python_gc_time | +| python_twisted_reactor_pending_calls | +| python_twisted_reactor_tick_time | +| synapse_http_server_response_time_seconds | +| synapse_storage_query_time | +| synapse_storage_schedule_time | +| synapse_storage_transaction_time | + +Block and response metrics renamed for 0.27.0 +--------------------------------------------- + +Synapse 0.27.0 begins the process of rationalising the duplicate +`*:count` metrics reported for the resource tracking for code blocks and +HTTP requests. + +At the same time, the corresponding `*:total` metrics are being renamed, +as the `:total` suffix no longer makes sense in the absence of a +corresponding `:count` metric. + +To enable a graceful migration path, this release just adds new names +for the metrics being renamed. A future release will remove the old +ones. + +The following table shows the new metrics, and the old metrics which +they are replacing. + +| New name | Old name | +| ------------------------------------------------------------- | ---------------------------------------------------------- | +| synapse_util_metrics_block_count | synapse_util_metrics_block_timer:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_ru_utime:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_ru_stime:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_db_txn_count:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_db_txn_duration:count | +| synapse_util_metrics_block_time_seconds | synapse_util_metrics_block_timer:total | +| synapse_util_metrics_block_ru_utime_seconds | synapse_util_metrics_block_ru_utime:total | +| synapse_util_metrics_block_ru_stime_seconds | synapse_util_metrics_block_ru_stime:total | +| synapse_util_metrics_block_db_txn_count | synapse_util_metrics_block_db_txn_count:total | +| synapse_util_metrics_block_db_txn_duration_seconds | synapse_util_metrics_block_db_txn_duration:total | +| synapse_http_server_response_count | synapse_http_server_requests | +| synapse_http_server_response_count | synapse_http_server_response_time:count | +| synapse_http_server_response_count | synapse_http_server_response_ru_utime:count | +| synapse_http_server_response_count | synapse_http_server_response_ru_stime:count | +| synapse_http_server_response_count | synapse_http_server_response_db_txn_count:count | +| synapse_http_server_response_count | synapse_http_server_response_db_txn_duration:count | +| synapse_http_server_response_time_seconds | synapse_http_server_response_time:total | +| synapse_http_server_response_ru_utime_seconds | synapse_http_server_response_ru_utime:total | +| synapse_http_server_response_ru_stime_seconds | synapse_http_server_response_ru_stime:total | +| synapse_http_server_response_db_txn_count | synapse_http_server_response_db_txn_count:total | +| synapse_http_server_response_db_txn_duration_seconds | synapse_http_server_response_db_txn_duration:total | + +Standard Metric Names +--------------------- + +As of synapse version 0.18.2, the format of the process-wide metrics has +been changed to fit prometheus standard naming conventions. Additionally +the units have been changed to seconds, from miliseconds. + +| New name | Old name | +| ---------------------------------------- | --------------------------------- | +| process_cpu_user_seconds_total | process_resource_utime / 1000 | +| process_cpu_system_seconds_total | process_resource_stime / 1000 | +| process_open_fds (no \'type\' label) | process_fds | + +The python-specific counts of garbage collector performance have been +renamed. + +| New name | Old name | +| -------------------------------- | -------------------------- | +| python_gc_time | reactor_gc_time | +| python_gc_unreachable_total | reactor_gc_unreachable | +| python_gc_counts | reactor_gc_counts | + +The twisted-specific reactor metrics have been renamed. + +| New name | Old name | +| -------------------------------------- | ----------------------- | +| python_twisted_reactor_pending_calls | reactor_pending_calls | +| python_twisted_reactor_tick_time | reactor_tick_time | diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst deleted file mode 100644 index 973641f3dc..0000000000 --- a/docs/metrics-howto.rst +++ /dev/null @@ -1,285 +0,0 @@ -How to monitor Synapse metrics using Prometheus -=============================================== - -1. Install Prometheus: - - Follow instructions at http://prometheus.io/docs/introduction/install/ - -2. Enable Synapse metrics: - - There are two methods of enabling metrics in Synapse. - - The first serves the metrics as a part of the usual web server and can be - enabled by adding the "metrics" resource to the existing listener as such:: - - resources: - - names: - - client - - metrics - - This provides a simple way of adding metrics to your Synapse installation, - and serves under ``/_synapse/metrics``. If you do not wish your metrics be - publicly exposed, you will need to either filter it out at your load - balancer, or use the second method. - - The second method runs the metrics server on a different port, in a - different thread to Synapse. This can make it more resilient to heavy load - meaning metrics cannot be retrieved, and can be exposed to just internal - networks easier. The served metrics are available over HTTP only, and will - be available at ``/``. - - Add a new listener to homeserver.yaml:: - - listeners: - - type: metrics - port: 9000 - bind_addresses: - - '0.0.0.0' - - For both options, you will need to ensure that ``enable_metrics`` is set to - ``True``. - - Restart Synapse. - -3. Add a Prometheus target for Synapse. - - It needs to set the ``metrics_path`` to a non-default value (under ``scrape_configs``):: - - - job_name: "synapse" - metrics_path: "/_synapse/metrics" - static_configs: - - targets: ["my.server.here:port"] - - where ``my.server.here`` is the IP address of Synapse, and ``port`` is the listener port - configured with the ``metrics`` resource. - - If your prometheus is older than 1.5.2, you will need to replace - ``static_configs`` in the above with ``target_groups``. - - Restart Prometheus. - - -Renaming of metrics & deprecation of old names in 1.2 ------------------------------------------------------ - -Synapse 1.2 updates the Prometheus metrics to match the naming convention of the -upstream ``prometheus_client``. The old names are considered deprecated and will -be removed in a future version of Synapse. - -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| New Name | Old Name | -+=============================================================================+=======================================================================+ -| python_gc_objects_collected_total | python_gc_objects_collected | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| python_gc_objects_uncollectable_total | python_gc_objects_uncollectable | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| python_gc_collections_total | python_gc_collections | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| process_cpu_seconds_total | process_cpu_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_transactions_total | synapse_federation_client_sent_transactions | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_events_processed_total | synapse_federation_client_events_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_event_processing_loop_count_total | synapse_event_processing_loop_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_event_processing_loop_room_count_total | synapse_event_processing_loop_room_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_count_total | synapse_util_metrics_block_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_time_seconds_total | synapse_util_metrics_block_time_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_ru_utime_seconds_total | synapse_util_metrics_block_ru_utime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_ru_stime_seconds_total | synapse_util_metrics_block_ru_stime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_db_txn_count_total | synapse_util_metrics_block_db_txn_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_db_txn_duration_seconds_total | synapse_util_metrics_block_db_txn_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_db_sched_duration_seconds_total | synapse_util_metrics_block_db_sched_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_start_count_total | synapse_background_process_start_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_ru_utime_seconds_total | synapse_background_process_ru_utime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_ru_stime_seconds_total | synapse_background_process_ru_stime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_db_txn_count_total | synapse_background_process_db_txn_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_db_txn_duration_seconds_total | synapse_background_process_db_txn_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_db_sched_duration_seconds_total | synapse_background_process_db_sched_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_persisted_events_total | synapse_storage_events_persisted_events | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_persisted_events_sep_total | synapse_storage_events_persisted_events_sep | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_state_delta_total | synapse_storage_events_state_delta | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_state_delta_single_event_total | synapse_storage_events_state_delta_single_event | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_state_delta_reuse_delta_total | synapse_storage_events_state_delta_reuse_delta | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_server_received_pdus_total | synapse_federation_server_received_pdus | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_server_received_edus_total | synapse_federation_server_received_edus | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_notified_presence_total | synapse_handler_presence_notified_presence | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_federation_presence_out_total | synapse_handler_presence_federation_presence_out | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_presence_updates_total | synapse_handler_presence_presence_updates | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_timers_fired_total | synapse_handler_presence_timers_fired | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_federation_presence_total | synapse_handler_presence_federation_presence | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_bump_active_time_total | synapse_handler_presence_bump_active_time | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_edus_total | synapse_federation_client_sent_edus | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_pdu_destinations_count_total | synapse_federation_client_sent_pdu_destinations:count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_pdu_destinations_total | synapse_federation_client_sent_pdu_destinations:total | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handlers_appservice_events_processed_total | synapse_handlers_appservice_events_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_notifier_notified_events_total | synapse_notifier_notified_events | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_http_pushes_processed_total | synapse_http_httppusher_http_pushes_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_http_pushes_failed_total | synapse_http_httppusher_http_pushes_failed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_badge_updates_processed_total | synapse_http_httppusher_badge_updates_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_badge_updates_failed_total | synapse_http_httppusher_badge_updates_failed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ - - -Removal of deprecated metrics & time based counters becoming histograms in 0.31.0 ---------------------------------------------------------------------------------- - -The duplicated metrics deprecated in Synapse 0.27.0 have been removed. - -All time duration-based metrics have been changed to be seconds. This affects: - -+----------------------------------+ -| msec -> sec metrics | -+==================================+ -| python_gc_time | -+----------------------------------+ -| python_twisted_reactor_tick_time | -+----------------------------------+ -| synapse_storage_query_time | -+----------------------------------+ -| synapse_storage_schedule_time | -+----------------------------------+ -| synapse_storage_transaction_time | -+----------------------------------+ - -Several metrics have been changed to be histograms, which sort entries into -buckets and allow better analysis. The following metrics are now histograms: - -+-------------------------------------------+ -| Altered metrics | -+===========================================+ -| python_gc_time | -+-------------------------------------------+ -| python_twisted_reactor_pending_calls | -+-------------------------------------------+ -| python_twisted_reactor_tick_time | -+-------------------------------------------+ -| synapse_http_server_response_time_seconds | -+-------------------------------------------+ -| synapse_storage_query_time | -+-------------------------------------------+ -| synapse_storage_schedule_time | -+-------------------------------------------+ -| synapse_storage_transaction_time | -+-------------------------------------------+ - - -Block and response metrics renamed for 0.27.0 ---------------------------------------------- - -Synapse 0.27.0 begins the process of rationalising the duplicate ``*:count`` -metrics reported for the resource tracking for code blocks and HTTP requests. - -At the same time, the corresponding ``*:total`` metrics are being renamed, as -the ``:total`` suffix no longer makes sense in the absence of a corresponding -``:count`` metric. - -To enable a graceful migration path, this release just adds new names for the -metrics being renamed. A future release will remove the old ones. - -The following table shows the new metrics, and the old metrics which they are -replacing. - -==================================================== =================================================== -New name Old name -==================================================== =================================================== -synapse_util_metrics_block_count synapse_util_metrics_block_timer:count -synapse_util_metrics_block_count synapse_util_metrics_block_ru_utime:count -synapse_util_metrics_block_count synapse_util_metrics_block_ru_stime:count -synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_count:count -synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_duration:count - -synapse_util_metrics_block_time_seconds synapse_util_metrics_block_timer:total -synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_utime:total -synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_ru_stime:total -synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_count:total -synapse_util_metrics_block_db_txn_duration_seconds synapse_util_metrics_block_db_txn_duration:total - -synapse_http_server_response_count synapse_http_server_requests -synapse_http_server_response_count synapse_http_server_response_time:count -synapse_http_server_response_count synapse_http_server_response_ru_utime:count -synapse_http_server_response_count synapse_http_server_response_ru_stime:count -synapse_http_server_response_count synapse_http_server_response_db_txn_count:count -synapse_http_server_response_count synapse_http_server_response_db_txn_duration:count - -synapse_http_server_response_time_seconds synapse_http_server_response_time:total -synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_utime:total -synapse_http_server_response_ru_stime_seconds synapse_http_server_response_ru_stime:total -synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_count:total -synapse_http_server_response_db_txn_duration_seconds synapse_http_server_response_db_txn_duration:total -==================================================== =================================================== - - -Standard Metric Names ---------------------- - -As of synapse version 0.18.2, the format of the process-wide metrics has been -changed to fit prometheus standard naming conventions. Additionally the units -have been changed to seconds, from miliseconds. - -================================== ============================= -New name Old name -================================== ============================= -process_cpu_user_seconds_total process_resource_utime / 1000 -process_cpu_system_seconds_total process_resource_stime / 1000 -process_open_fds (no 'type' label) process_fds -================================== ============================= - -The python-specific counts of garbage collector performance have been renamed. - -=========================== ====================== -New name Old name -=========================== ====================== -python_gc_time reactor_gc_time -python_gc_unreachable_total reactor_gc_unreachable -python_gc_counts reactor_gc_counts -=========================== ====================== - -The twisted-specific reactor metrics have been renamed. - -==================================== ===================== -New name Old name -==================================== ===================== -python_twisted_reactor_pending_calls reactor_pending_calls -python_twisted_reactor_tick_time reactor_tick_time -==================================== ===================== diff --git a/docs/opentracing.md b/docs/opentracing.md new file mode 100644 index 0000000000..4c7a56a5d7 --- /dev/null +++ b/docs/opentracing.md @@ -0,0 +1,93 @@ +# OpenTracing + +## Background + +OpenTracing is a semi-standard being adopted by a number of distributed +tracing platforms. It is a common api for facilitating vendor-agnostic +tracing instrumentation. That is, we can use the OpenTracing api and +select one of a number of tracer implementations to do the heavy lifting +in the background. Our current selected implementation is Jaeger. + +OpenTracing is a tool which gives an insight into the causal +relationship of work done in and between servers. The servers each track +events and report them to a centralised server - in Synapse's case: +Jaeger. The basic unit used to represent events is the span. The span +roughly represents a single piece of work that was done and the time at +which it occurred. A span can have child spans, meaning that the work of +the child had to be completed for the parent span to complete, or it can +have follow-on spans which represent work that is undertaken as a result +of the parent but is not depended on by the parent to in order to +finish. + +Since this is undertaken in a distributed environment a request to +another server, such as an RPC or a simple GET, can be considered a span +(a unit or work) for the local server. This causal link is what +OpenTracing aims to capture and visualise. In order to do this metadata +about the local server's span, i.e the 'span context', needs to be +included with the request to the remote. + +It is up to the remote server to decide what it does with the spans it +creates. This is called the sampling policy and it can be configured +through Jaeger's settings. + +For OpenTracing concepts see +. + +For more information about Jaeger's implementation see + + +## Setting up OpenTracing + +To receive OpenTracing spans, start up a Jaeger server. This can be done +using docker like so: + +```sh +docker run -d --name jaeger + -p 6831:6831/udp \ + -p 6832:6832/udp \ + -p 5778:5778 \ + -p 16686:16686 \ + -p 14268:14268 \ + jaegertracing/all-in-one:1.13 +``` + +Latest documentation is probably at + + +## Enable OpenTracing in Synapse + +OpenTracing is not enabled by default. It must be enabled in the +homeserver config by uncommenting the config options under `opentracing` +as shown in the [sample config](./sample_config.yaml). For example: + +```yaml +opentracing: + tracer_enabled: true + homeserver_whitelist: + - "mytrustedhomeserver.org" + - "*.myotherhomeservers.com" +``` + +## Homeserver whitelisting + +The homeserver whitelist is configured using regular expressions. A list +of regular expressions can be given and their union will be compared +when propagating any spans contexts to another homeserver. + +Though it's mostly safe to send and receive span contexts to and from +untrusted users since span contexts are usually opaque ids it can lead +to two problems, namely: + +- If the span context is marked as sampled by the sending homeserver + the receiver will sample it. Therefore two homeservers with wildly + different sampling policies could incur higher sampling counts than + intended. +- Sending servers can attach arbitrary data to spans, known as + 'baggage'. For safety this has been disabled in Synapse but that + doesn't prevent another server sending you baggage which will be + logged to OpenTracing's logs. + +## Configuring Jaeger + +Sampling strategies can be set as in this document: + diff --git a/docs/opentracing.rst b/docs/opentracing.rst deleted file mode 100644 index 6e98ab56ba..0000000000 --- a/docs/opentracing.rst +++ /dev/null @@ -1,123 +0,0 @@ -=========== -OpenTracing -=========== - -Background ----------- - -OpenTracing is a semi-standard being adopted by a number of distributed tracing -platforms. It is a common api for facilitating vendor-agnostic tracing -instrumentation. That is, we can use the OpenTracing api and select one of a -number of tracer implementations to do the heavy lifting in the background. -Our current selected implementation is Jaeger. - -OpenTracing is a tool which gives an insight into the causal relationship of -work done in and between servers. The servers each track events and report them -to a centralised server - in Synapse's case: Jaeger. The basic unit used to -represent events is the span. The span roughly represents a single piece of work -that was done and the time at which it occurred. A span can have child spans, -meaning that the work of the child had to be completed for the parent span to -complete, or it can have follow-on spans which represent work that is undertaken -as a result of the parent but is not depended on by the parent to in order to -finish. - -Since this is undertaken in a distributed environment a request to another -server, such as an RPC or a simple GET, can be considered a span (a unit or -work) for the local server. This causal link is what OpenTracing aims to -capture and visualise. In order to do this metadata about the local server's -span, i.e the 'span context', needs to be included with the request to the -remote. - -It is up to the remote server to decide what it does with the spans -it creates. This is called the sampling policy and it can be configured -through Jaeger's settings. - -For OpenTracing concepts see -https://opentracing.io/docs/overview/what-is-tracing/. - -For more information about Jaeger's implementation see -https://www.jaegertracing.io/docs/ - -===================== -Seting up OpenTracing -===================== - -To receive OpenTracing spans, start up a Jaeger server. This can be done -using docker like so: - -.. code-block:: bash - - docker run -d --name jaeger - -p 6831:6831/udp \ - -p 6832:6832/udp \ - -p 5778:5778 \ - -p 16686:16686 \ - -p 14268:14268 \ - jaegertracing/all-in-one:1.13 - -Latest documentation is probably at -https://www.jaegertracing.io/docs/1.13/getting-started/ - - -Enable OpenTracing in Synapse ------------------------------ - -OpenTracing is not enabled by default. It must be enabled in the homeserver -config by uncommenting the config options under ``opentracing`` as shown in -the `sample config <./sample_config.yaml>`_. For example: - -.. code-block:: yaml - - opentracing: - tracer_enabled: true - homeserver_whitelist: - - "mytrustedhomeserver.org" - - "*.myotherhomeservers.com" - -Homeserver whitelisting ------------------------ - -The homeserver whitelist is configured using regular expressions. A list of regular -expressions can be given and their union will be compared when propagating any -spans contexts to another homeserver. - -Though it's mostly safe to send and receive span contexts to and from -untrusted users since span contexts are usually opaque ids it can lead to -two problems, namely: - -- If the span context is marked as sampled by the sending homeserver the receiver will - sample it. Therefore two homeservers with wildly different sampling policies - could incur higher sampling counts than intended. -- Sending servers can attach arbitrary data to spans, known as 'baggage'. For safety this has been disabled in Synapse - but that doesn't prevent another server sending you baggage which will be logged - to OpenTracing's logs. - -========== -EDU FORMAT -========== - -EDUs can contain tracing data in their content. This is not specced but -it could be of interest for other homeservers. - -EDU format (if you're using jaeger): - -.. code-block:: json - - { - "edu_type": "type", - "content": { - "org.matrix.opentracing_context": { - "uber-trace-id": "fe57cf3e65083289" - } - } - } - -Though you don't have to use jaeger you must inject the span context into -`org.matrix.opentracing_context` using the opentracing `Format.TEXT_MAP` inject method. - -================== -Configuring Jaeger -================== - -Sampling strategies can be set as in this document: -https://www.jaegertracing.io/docs/1.13/sampling/ diff --git a/docs/password_auth_providers.md b/docs/password_auth_providers.md new file mode 100644 index 0000000000..0db1a3804a --- /dev/null +++ b/docs/password_auth_providers.md @@ -0,0 +1,116 @@ +# Password auth provider modules + +Password auth providers offer a way for server administrators to +integrate their Synapse installation with an existing authentication +system. + +A password auth provider is a Python class which is dynamically loaded +into Synapse, and provides a number of methods by which it can integrate +with the authentication system. + +This document serves as a reference for those looking to implement their +own password auth providers. + +## Required methods + +Password auth provider classes must provide the following methods: + +*class* `SomeProvider.parse_config`(*config*) + +> This method is passed the `config` object for this module from the +> homeserver configuration file. +> +> It should perform any appropriate sanity checks on the provided +> configuration, and return an object which is then passed into +> `__init__`. + +*class* `SomeProvider`(*config*, *account_handler*) + +> The constructor is passed the config object returned by +> `parse_config`, and a `synapse.module_api.ModuleApi` object which +> allows the password provider to check if accounts exist and/or create +> new ones. + +## Optional methods + +Password auth provider classes may optionally provide the following +methods. + +*class* `SomeProvider.get_db_schema_files`() + +> This method, if implemented, should return an Iterable of +> `(name, stream)` pairs of database schema files. Each file is applied +> in turn at initialisation, and a record is then made in the database +> so that it is not re-applied on the next start. + +`someprovider.get_supported_login_types`() + +> This method, if implemented, should return a `dict` mapping from a +> login type identifier (such as `m.login.password`) to an iterable +> giving the fields which must be provided by the user in the submission +> to the `/login` api. These fields are passed in the `login_dict` +> dictionary to `check_auth`. +> +> For example, if a password auth provider wants to implement a custom +> login type of `com.example.custom_login`, where the client is expected +> to pass the fields `secret1` and `secret2`, the provider should +> implement this method and return the following dict: +> +> {"com.example.custom_login": ("secret1", "secret2")} + +`someprovider.check_auth`(*username*, *login_type*, *login_dict*) + +> This method is the one that does the real work. If implemented, it +> will be called for each login attempt where the login type matches one +> of the keys returned by `get_supported_login_types`. +> +> It is passed the (possibly UNqualified) `user` provided by the client, +> the login type, and a dictionary of login secrets passed by the +> client. +> +> The method should return a Twisted `Deferred` object, which resolves +> to the canonical `@localpart:domain` user id if authentication is +> successful, and `None` if not. +> +> Alternatively, the `Deferred` can resolve to a `(str, func)` tuple, in +> which case the second field is a callback which will be called with +> the result from the `/login` call (including `access_token`, +> `device_id`, etc.) + +`someprovider.check_3pid_auth`(*medium*, *address*, *password*) + +> This method, if implemented, is called when a user attempts to +> register or log in with a third party identifier, such as email. It is +> passed the medium (ex. "email"), an address (ex. +> "") and the user's password. +> +> The method should return a Twisted `Deferred` object, which resolves +> to a `str` containing the user's (canonical) User ID if +> authentication was successful, and `None` if not. +> +> As with `check_auth`, the `Deferred` may alternatively resolve to a +> `(user_id, callback)` tuple. + +`someprovider.check_password`(*user_id*, *password*) + +> This method provides a simpler interface than +> `get_supported_login_types` and `check_auth` for password auth +> providers that just want to provide a mechanism for validating +> `m.login.password` logins. +> +> Iif implemented, it will be called to check logins with an +> `m.login.password` login type. It is passed a qualified +> `@localpart:domain` user id, and the password provided by the user. +> +> The method should return a Twisted `Deferred` object, which resolves +> to `True` if authentication is successful, and `False` if not. + +`someprovider.on_logged_out`(*user_id*, *device_id*, *access_token*) + +> This method, if implemented, is called when a user logs out. It is +> passed the qualified user ID, the ID of the deactivated device (if +> any: access tokens are occasionally created without an associated +> device ID), and the (now deactivated) access token. +> +> It may return a Twisted `Deferred` object; the logout request will +> wait for the deferred to complete but the result is ignored. diff --git a/docs/password_auth_providers.rst b/docs/password_auth_providers.rst deleted file mode 100644 index 6149ba7458..0000000000 --- a/docs/password_auth_providers.rst +++ /dev/null @@ -1,113 +0,0 @@ -Password auth provider modules -============================== - -Password auth providers offer a way for server administrators to integrate -their Synapse installation with an existing authentication system. - -A password auth provider is a Python class which is dynamically loaded into -Synapse, and provides a number of methods by which it can integrate with the -authentication system. - -This document serves as a reference for those looking to implement their own -password auth providers. - -Required methods ----------------- - -Password auth provider classes must provide the following methods: - -*class* ``SomeProvider.parse_config``\(*config*) - - This method is passed the ``config`` object for this module from the - homeserver configuration file. - - It should perform any appropriate sanity checks on the provided - configuration, and return an object which is then passed into ``__init__``. - -*class* ``SomeProvider``\(*config*, *account_handler*) - - The constructor is passed the config object returned by ``parse_config``, - and a ``synapse.module_api.ModuleApi`` object which allows the - password provider to check if accounts exist and/or create new ones. - -Optional methods ----------------- - -Password auth provider classes may optionally provide the following methods. - -*class* ``SomeProvider.get_db_schema_files``\() - - This method, if implemented, should return an Iterable of ``(name, - stream)`` pairs of database schema files. Each file is applied in turn at - initialisation, and a record is then made in the database so that it is - not re-applied on the next start. - -``someprovider.get_supported_login_types``\() - - This method, if implemented, should return a ``dict`` mapping from a login - type identifier (such as ``m.login.password``) to an iterable giving the - fields which must be provided by the user in the submission to the - ``/login`` api. These fields are passed in the ``login_dict`` dictionary - to ``check_auth``. - - For example, if a password auth provider wants to implement a custom login - type of ``com.example.custom_login``, where the client is expected to pass - the fields ``secret1`` and ``secret2``, the provider should implement this - method and return the following dict:: - - {"com.example.custom_login": ("secret1", "secret2")} - -``someprovider.check_auth``\(*username*, *login_type*, *login_dict*) - - This method is the one that does the real work. If implemented, it will be - called for each login attempt where the login type matches one of the keys - returned by ``get_supported_login_types``. - - It is passed the (possibly UNqualified) ``user`` provided by the client, - the login type, and a dictionary of login secrets passed by the client. - - The method should return a Twisted ``Deferred`` object, which resolves to - the canonical ``@localpart:domain`` user id if authentication is successful, - and ``None`` if not. - - Alternatively, the ``Deferred`` can resolve to a ``(str, func)`` tuple, in - which case the second field is a callback which will be called with the - result from the ``/login`` call (including ``access_token``, ``device_id``, - etc.) - -``someprovider.check_3pid_auth``\(*medium*, *address*, *password*) - - This method, if implemented, is called when a user attempts to register or - log in with a third party identifier, such as email. It is passed the - medium (ex. "email"), an address (ex. "jdoe@example.com") and the user's - password. - - The method should return a Twisted ``Deferred`` object, which resolves to - a ``str`` containing the user's (canonical) User ID if authentication was - successful, and ``None`` if not. - - As with ``check_auth``, the ``Deferred`` may alternatively resolve to a - ``(user_id, callback)`` tuple. - -``someprovider.check_password``\(*user_id*, *password*) - - This method provides a simpler interface than ``get_supported_login_types`` - and ``check_auth`` for password auth providers that just want to provide a - mechanism for validating ``m.login.password`` logins. - - Iif implemented, it will be called to check logins with an - ``m.login.password`` login type. It is passed a qualified - ``@localpart:domain`` user id, and the password provided by the user. - - The method should return a Twisted ``Deferred`` object, which resolves to - ``True`` if authentication is successful, and ``False`` if not. - -``someprovider.on_logged_out``\(*user_id*, *device_id*, *access_token*) - - This method, if implemented, is called when a user logs out. It is passed - the qualified user ID, the ID of the deactivated device (if any: access - tokens are occasionally created without an associated device ID), and the - (now deactivated) access token. - - It may return a Twisted ``Deferred`` object; the logout request will wait - for the deferred to complete but the result is ignored. diff --git a/docs/postgres.md b/docs/postgres.md new file mode 100644 index 0000000000..29cf762858 --- /dev/null +++ b/docs/postgres.md @@ -0,0 +1,164 @@ +# Using Postgres + +Postgres version 9.5 or later is known to work. + +## Install postgres client libraries + +Synapse will require the python postgres client library in order to +connect to a postgres database. + +- If you are using the [matrix.org debian/ubuntu + packages](../INSTALL.md#matrixorg-packages), the necessary python + library will already be installed, but you will need to ensure the + low-level postgres library is installed, which you can do with + `apt install libpq5`. +- For other pre-built packages, please consult the documentation from + the relevant package. +- If you installed synapse [in a + virtualenv](../INSTALL.md#installing-from-source), you can install + the library with: + + ~/synapse/env/bin/pip install matrix-synapse[postgres] + + (substituting the path to your virtualenv for `~/synapse/env`, if + you used a different path). You will require the postgres + development files. These are in the `libpq-dev` package on + Debian-derived distributions. + +## Set up database + +Assuming your PostgreSQL database user is called `postgres`, create a +user `synapse_user` with: + + su - postgres + createuser --pwprompt synapse_user + +Before you can authenticate with the `synapse_user`, you must create a +database that it can access. To create a database, first connect to the +database with your database user: + + su - postgres + psql + +and then run: + + CREATE DATABASE synapse + ENCODING 'UTF8' + LC_COLLATE='C' + LC_CTYPE='C' + template=template0 + OWNER synapse_user; + +This would create an appropriate database named `synapse` owned by the +`synapse_user` user (which must already have been created as above). + +Note that the PostgreSQL database *must* have the correct encoding set +(as shown above), otherwise it will not be able to store UTF8 strings. + +You may need to enable password authentication so `synapse_user` can +connect to the database. See +. + +## Tuning Postgres + +The default settings should be fine for most deployments. For larger +scale deployments tuning some of the settings is recommended, details of +which can be found at +. + +In particular, we've found tuning the following values helpful for +performance: + +- `shared_buffers` +- `effective_cache_size` +- `work_mem` +- `maintenance_work_mem` +- `autovacuum_work_mem` + +Note that the appropriate values for those fields depend on the amount +of free memory the database host has available. + +## Synapse config + +When you are ready to start using PostgreSQL, edit the `database` +section in your config file to match the following lines: + + database: + name: psycopg2 + args: + user: + password: + database: + host: + cp_min: 5 + cp_max: 10 + +All key, values in `args` are passed to the `psycopg2.connect(..)` +function, except keys beginning with `cp_`, which are consumed by the +twisted adbapi connection pool. + +## Porting from SQLite + +### Overview + +The script `synapse_port_db` allows porting an existing synapse server +backed by SQLite to using PostgreSQL. This is done in as a two phase +process: + +1. Copy the existing SQLite database to a separate location (while the + server is down) and running the port script against that offline + database. +2. Shut down the server. Rerun the port script to port any data that + has come in since taking the first snapshot. Restart server against + the PostgreSQL database. + +The port script is designed to be run repeatedly against newer snapshots +of the SQLite database file. This makes it safe to repeat step 1 if +there was a delay between taking the previous snapshot and being ready +to do step 2. + +It is safe to at any time kill the port script and restart it. + +### Using the port script + +Firstly, shut down the currently running synapse server and copy its +database file (typically `homeserver.db`) to another location. Once the +copy is complete, restart synapse. For instance: + + ./synctl stop + cp homeserver.db homeserver.db.snapshot + ./synctl start + +Copy the old config file into a new config file: + + cp homeserver.yaml homeserver-postgres.yaml + +Edit the database section as described in the section *Synapse config* +above and with the SQLite snapshot located at `homeserver.db.snapshot` +simply run: + + synapse_port_db --sqlite-database homeserver.db.snapshot \ + --postgres-config homeserver-postgres.yaml + +The flag `--curses` displays a coloured curses progress UI. + +If the script took a long time to complete, or time has otherwise passed +since the original snapshot was taken, repeat the previous steps with a +newer snapshot. + +To complete the conversion shut down the synapse server and run the port +script one last time, e.g. if the SQLite database is at `homeserver.db` +run: + + synapse_port_db --sqlite-database homeserver.db \ + --postgres-config homeserver-postgres.yaml + +Once that has completed, change the synapse config to point at the +PostgreSQL database configuration file `homeserver-postgres.yaml`: + + ./synctl stop + mv homeserver.yaml homeserver-old-sqlite.yaml + mv homeserver-postgres.yaml homeserver.yaml + ./synctl start + +Synapse should now be running against PostgreSQL. diff --git a/docs/postgres.rst b/docs/postgres.rst deleted file mode 100644 index e08a5116b9..0000000000 --- a/docs/postgres.rst +++ /dev/null @@ -1,166 +0,0 @@ -Using Postgres --------------- - -Postgres version 9.5 or later is known to work. - -Install postgres client libraries -================================= - -Synapse will require the python postgres client library in order to connect to -a postgres database. - -* If you are using the `matrix.org debian/ubuntu - packages <../INSTALL.md#matrixorg-packages>`_, - the necessary python library will already be installed, but you will need to - ensure the low-level postgres library is installed, which you can do with - ``apt install libpq5``. - -* For other pre-built packages, please consult the documentation from the - relevant package. - -* If you installed synapse `in a virtualenv - <../INSTALL.md#installing-from-source>`_, you can install the library with:: - - ~/synapse/env/bin/pip install matrix-synapse[postgres] - - (substituting the path to your virtualenv for ``~/synapse/env``, if you used a - different path). You will require the postgres development files. These are in - the ``libpq-dev`` package on Debian-derived distributions. - -Set up database -=============== - -Assuming your PostgreSQL database user is called ``postgres``, create a user -``synapse_user`` with:: - - su - postgres - createuser --pwprompt synapse_user - -Before you can authenticate with the ``synapse_user``, you must create a -database that it can access. To create a database, first connect to the database -with your database user:: - - su - postgres - psql - -and then run:: - - CREATE DATABASE synapse - ENCODING 'UTF8' - LC_COLLATE='C' - LC_CTYPE='C' - template=template0 - OWNER synapse_user; - -This would create an appropriate database named ``synapse`` owned by the -``synapse_user`` user (which must already have been created as above). - -Note that the PostgreSQL database *must* have the correct encoding set (as -shown above), otherwise it will not be able to store UTF8 strings. - -You may need to enable password authentication so ``synapse_user`` can connect -to the database. See https://www.postgresql.org/docs/11/auth-pg-hba-conf.html. - -Tuning Postgres -=============== - -The default settings should be fine for most deployments. For larger scale -deployments tuning some of the settings is recommended, details of which can be -found at https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server. - -In particular, we've found tuning the following values helpful for performance: - -- ``shared_buffers`` -- ``effective_cache_size`` -- ``work_mem`` -- ``maintenance_work_mem`` -- ``autovacuum_work_mem`` - -Note that the appropriate values for those fields depend on the amount of free -memory the database host has available. - -Synapse config -============== - -When you are ready to start using PostgreSQL, edit the ``database`` section in -your config file to match the following lines:: - - database: - name: psycopg2 - args: - user: - password: - database: - host: - cp_min: 5 - cp_max: 10 - -All key, values in ``args`` are passed to the ``psycopg2.connect(..)`` -function, except keys beginning with ``cp_``, which are consumed by the twisted -adbapi connection pool. - - -Porting from SQLite -=================== - -Overview -~~~~~~~~ - -The script ``synapse_port_db`` allows porting an existing synapse server -backed by SQLite to using PostgreSQL. This is done in as a two phase process: - -1. Copy the existing SQLite database to a separate location (while the server - is down) and running the port script against that offline database. -2. Shut down the server. Rerun the port script to port any data that has come - in since taking the first snapshot. Restart server against the PostgreSQL - database. - -The port script is designed to be run repeatedly against newer snapshots of the -SQLite database file. This makes it safe to repeat step 1 if there was a delay -between taking the previous snapshot and being ready to do step 2. - -It is safe to at any time kill the port script and restart it. - -Using the port script -~~~~~~~~~~~~~~~~~~~~~ - -Firstly, shut down the currently running synapse server and copy its database -file (typically ``homeserver.db``) to another location. Once the copy is -complete, restart synapse. For instance:: - - ./synctl stop - cp homeserver.db homeserver.db.snapshot - ./synctl start - -Copy the old config file into a new config file:: - - cp homeserver.yaml homeserver-postgres.yaml - -Edit the database section as described in the section *Synapse config* above -and with the SQLite snapshot located at ``homeserver.db.snapshot`` simply run:: - - synapse_port_db --sqlite-database homeserver.db.snapshot \ - --postgres-config homeserver-postgres.yaml - -The flag ``--curses`` displays a coloured curses progress UI. - -If the script took a long time to complete, or time has otherwise passed since -the original snapshot was taken, repeat the previous steps with a newer -snapshot. - -To complete the conversion shut down the synapse server and run the port -script one last time, e.g. if the SQLite database is at ``homeserver.db`` -run:: - - synapse_port_db --sqlite-database homeserver.db \ - --postgres-config homeserver-postgres.yaml - -Once that has completed, change the synapse config to point at the PostgreSQL -database configuration file ``homeserver-postgres.yaml``:: - - ./synctl stop - mv homeserver.yaml homeserver-old-sqlite.yaml - mv homeserver-postgres.yaml homeserver.yaml - ./synctl start - -Synapse should now be running against PostgreSQL. diff --git a/docs/replication.md b/docs/replication.md new file mode 100644 index 0000000000..ed88233157 --- /dev/null +++ b/docs/replication.md @@ -0,0 +1,37 @@ +# Replication Architecture + +## Motivation + +We'd like to be able to split some of the work that synapse does into +multiple python processes. In theory multiple synapse processes could +share a single postgresql database and we\'d scale up by running more +synapse processes. However much of synapse assumes that only one process +is interacting with the database, both for assigning unique identifiers +when inserting into tables, notifying components about new updates, and +for invalidating its caches. + +So running multiple copies of the current code isn't an option. One way +to run multiple processes would be to have a single writer process and +multiple reader processes connected to the same database. In order to do +this we'd need a way for the reader process to invalidate its in-memory +caches when an update happens on the writer. One way to do this is for +the writer to present an append-only log of updates which the readers +can consume to invalidate their caches and to push updates to listening +clients or pushers. + +Synapse already stores much of its data as an append-only log so that it +can correctly respond to `/sync` requests so the amount of code changes +needed to expose the append-only log to the readers should be fairly +minimal. + +## Architecture + +### The Replication Protocol + +See [tcp_replication.md](tcp_replication.md) + +### The Slaved DataStore + +There are read-only version of the synapse storage layer in +`synapse/replication/slave/storage` that use the response of the +replication API to invalidate their caches. diff --git a/docs/replication.rst b/docs/replication.rst deleted file mode 100644 index 310abb3488..0000000000 --- a/docs/replication.rst +++ /dev/null @@ -1,40 +0,0 @@ -Replication Architecture -======================== - -Motivation ----------- - -We'd like to be able to split some of the work that synapse does into multiple -python processes. In theory multiple synapse processes could share a single -postgresql database and we'd scale up by running more synapse processes. -However much of synapse assumes that only one process is interacting with the -database, both for assigning unique identifiers when inserting into tables, -notifying components about new updates, and for invalidating its caches. - -So running multiple copies of the current code isn't an option. One way to -run multiple processes would be to have a single writer process and multiple -reader processes connected to the same database. In order to do this we'd need -a way for the reader process to invalidate its in-memory caches when an update -happens on the writer. One way to do this is for the writer to present an -append-only log of updates which the readers can consume to invalidate their -caches and to push updates to listening clients or pushers. - -Synapse already stores much of its data as an append-only log so that it can -correctly respond to /sync requests so the amount of code changes needed to -expose the append-only log to the readers should be fairly minimal. - -Architecture ------------- - -The Replication Protocol -~~~~~~~~~~~~~~~~~~~~~~~~ - -See ``tcp_replication.rst`` - - -The Slaved DataStore -~~~~~~~~~~~~~~~~~~~~ - -There are read-only version of the synapse storage layer in -``synapse/replication/slave/storage`` that use the response of the replication -API to invalidate their caches. diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md new file mode 100644 index 0000000000..dcfc5c64aa --- /dev/null +++ b/docs/reverse_proxy.md @@ -0,0 +1,123 @@ +# Using a reverse proxy with Synapse + +It is recommended to put a reverse proxy such as +[nginx](https://nginx.org/en/docs/http/ngx_http_proxy_module.html), +[Apache](https://httpd.apache.org/docs/current/mod/mod_proxy_http.html), +[Caddy](https://caddyserver.com/docs/proxy) or +[HAProxy](https://www.haproxy.org/) in front of Synapse. One advantage +of doing so is that it means that you can expose the default https port +(443) to Matrix clients without needing to run Synapse with root +privileges. + +> **NOTE**: Your reverse proxy must not `canonicalise` or `normalise` +the requested URI in any way (for example, by decoding `%xx` escapes). +Beware that Apache *will* canonicalise URIs unless you specifify +`nocanon`. + +When setting up a reverse proxy, remember that Matrix clients and other +Matrix servers do not necessarily need to connect to your server via the +same server name or port. Indeed, clients will use port 443 by default, +whereas servers default to port 8448. Where these are different, we +refer to the 'client port' and the \'federation port\'. See [Setting +up federation](federate.md) for more details of the algorithm used for +federation connections. + +Let's assume that we expect clients to connect to our server at +`https://matrix.example.com`, and other servers to connect at +`https://example.com:8448`. The following sections detail the configuration of +the reverse proxy and the homeserver. + +## Webserver configuration examples + +> **NOTE**: You only need one of these. + +### nginx + + server { + listen 443 ssl; + listen [::]:443 ssl; + server_name matrix.example.com; + + location /_matrix { + proxy_pass http://localhost:8008; + proxy_set_header X-Forwarded-For $remote_addr; + } + } + + server { + listen 8448 ssl default_server; + listen [::]:8448 ssl default_server; + server_name example.com; + + location / { + proxy_pass http://localhost:8008; + proxy_set_header X-Forwarded-For $remote_addr; + } + } + +> **NOTE**: Do not add a `/` after the port in `proxy_pass`, otherwise nginx will +canonicalise/normalise the URI. + +### Caddy + + matrix.example.com { + proxy /_matrix http://localhost:8008 { + transparent + } + } + + example.com:8448 { + proxy / http://localhost:8008 { + transparent + } + } + +### Apache + + + SSLEngine on + ServerName matrix.example.com; + + AllowEncodedSlashes NoDecode + ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon + ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix + + + + SSLEngine on + ServerName example.com; + + AllowEncodedSlashes NoDecode + ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon + ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix + + +> **NOTE**: ensure the `nocanon` options are included. + +### HAProxy + + frontend https + bind :::443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1 + + # Matrix client traffic + acl matrix-host hdr(host) -i matrix.example.com + acl matrix-path path_beg /_matrix + + use_backend matrix if matrix-host matrix-path + + frontend matrix-federation + bind :::8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1 + default_backend matrix + + backend matrix + server matrix 127.0.0.1:8008 + +## Homeserver Configuration + +You will also want to set `bind_addresses: ['127.0.0.1']` and +`x_forwarded: true` for port 8008 in `homeserver.yaml` to ensure that +client IP addresses are recorded correctly. + +Having done so, you can then use `https://matrix.example.com` (instead +of `https://matrix.example.com:8448`) as the "Custom server" when +connecting to Synapse from a client. diff --git a/docs/reverse_proxy.rst b/docs/reverse_proxy.rst deleted file mode 100644 index 4b640ffc4f..0000000000 --- a/docs/reverse_proxy.rst +++ /dev/null @@ -1,112 +0,0 @@ -Using a reverse proxy with Synapse -================================== - -It is recommended to put a reverse proxy such as -`nginx `_, -`Apache `_, -`Caddy `_ or -`HAProxy `_ in front of Synapse. One advantage of -doing so is that it means that you can expose the default https port (443) to -Matrix clients without needing to run Synapse with root privileges. - -**NOTE**: Your reverse proxy must not 'canonicalise' or 'normalise' the -requested URI in any way (for example, by decoding ``%xx`` escapes). Beware -that Apache *will* canonicalise URIs unless you specifify ``nocanon``. - -When setting up a reverse proxy, remember that Matrix clients and other Matrix -servers do not necessarily need to connect to your server via the same server -name or port. Indeed, clients will use port 443 by default, whereas servers -default to port 8448. Where these are different, we refer to the 'client port' -and the 'federation port'. See `Setting up federation -`_ for more details of the algorithm used for -federation connections. - -Let's assume that we expect clients to connect to our server at -``https://matrix.example.com``, and other servers to connect at -``https://example.com:8448``. Here are some example configurations: - -* nginx:: - - server { - listen 443 ssl; - listen [::]:443 ssl; - server_name matrix.example.com; - - location /_matrix { - proxy_pass http://localhost:8008; - proxy_set_header X-Forwarded-For $remote_addr; - } - } - - server { - listen 8448 ssl default_server; - listen [::]:8448 ssl default_server; - server_name example.com; - - location / { - proxy_pass http://localhost:8008; - proxy_set_header X-Forwarded-For $remote_addr; - } - } - - Do not add a `/` after the port in `proxy_pass`, otherwise nginx will canonicalise/normalise the URI. - -* Caddy:: - - matrix.example.com { - proxy /_matrix http://localhost:8008 { - transparent - } - } - - example.com:8448 { - proxy / http://localhost:8008 { - transparent - } - } - -* Apache (note the ``nocanon`` options here!):: - - - SSLEngine on - ServerName matrix.example.com; - - AllowEncodedSlashes NoDecode - ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon - ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix - - - - SSLEngine on - ServerName example.com; - - AllowEncodedSlashes NoDecode - ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon - ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix - - -* HAProxy:: - - frontend https - bind :::443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1 - - # Matrix client traffic - acl matrix-host hdr(host) -i matrix.example.com - acl matrix-path path_beg /_matrix - - use_backend matrix if matrix-host matrix-path - - frontend matrix-federation - bind :::8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1 - default_backend matrix - - backend matrix - server matrix 127.0.0.1:8008 - -You will also want to set ``bind_addresses: ['127.0.0.1']`` and ``x_forwarded: true`` -for port 8008 in ``homeserver.yaml`` to ensure that client IP addresses are -recorded correctly. - -Having done so, you can then use ``https://matrix.example.com`` (instead of -``https://matrix.example.com:8448``) as the "Custom server" when connecting to -Synapse from a client. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index dd4e2d5ebd..d5a8d24c2b 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -136,8 +136,8 @@ federation_ip_range_blacklist: # # type: the type of listener. Normally 'http', but other valid options are: # 'manhole' (see docs/manhole.md), -# 'metrics' (see docs/metrics-howto.rst), -# 'replication' (see docs/workers.rst). +# 'metrics' (see docs/metrics-howto.md), +# 'replication' (see docs/workers.md). # # tls: set to true to enable TLS for this listener. Will use the TLS # key/cert specified in tls_private_key_path / tls_certificate_path. @@ -172,12 +172,12 @@ federation_ip_range_blacklist: # # media: the media API (/_matrix/media). # -# metrics: the metrics interface. See docs/metrics-howto.rst. +# metrics: the metrics interface. See docs/metrics-howto.md. # # openid: OpenID authentication. # # replication: the HTTP replication API (/_synapse/replication). See -# docs/workers.rst. +# docs/workers.md. # # static: static resources under synapse/static (/_matrix/static). (Mostly # useful for 'fallback authentication'.) @@ -201,7 +201,7 @@ listeners: # that unwraps TLS. # # If you plan to use a reverse proxy, please see - # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. + # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md. # - port: 8008 tls: false @@ -1520,7 +1520,7 @@ opentracing: #enabled: true # The list of homeservers we wish to send and receive span contexts and span baggage. - # See docs/opentracing.rst + # See docs/opentracing.md # This is a list of regexes which are matched against the server_name of the # homeserver. # diff --git a/docs/tcp_replication.md b/docs/tcp_replication.md new file mode 100644 index 0000000000..e099d8a87b --- /dev/null +++ b/docs/tcp_replication.md @@ -0,0 +1,249 @@ +# TCP Replication + +## Motivation + +Previously the workers used an HTTP long poll mechanism to get updates +from the master, which had the problem of causing a lot of duplicate +work on the server. This TCP protocol replaces those APIs with the aim +of increased efficiency. + +## Overview + +The protocol is based on fire and forget, line based commands. An +example flow would be (where '>' indicates master to worker and +'<' worker to master flows): + + > SERVER example.com + < REPLICATE events 53 + > RDATA events 54 ["$foo1:bar.com", ...] + > RDATA events 55 ["$foo4:bar.com", ...] + +The example shows the server accepting a new connection and sending its +identity with the `SERVER` command, followed by the client asking to +subscribe to the `events` stream from the token `53`. The server then +periodically sends `RDATA` commands which have the format +`RDATA `, where the format of `` is +defined by the individual streams. + +Error reporting happens by either the client or server sending an ERROR +command, and usually the connection will be closed. + +Since the protocol is a simple line based, its possible to manually +connect to the server using a tool like netcat. A few things should be +noted when manually using the protocol: + +- When subscribing to a stream using `REPLICATE`, the special token + `NOW` can be used to get all future updates. The special stream name + `ALL` can be used with `NOW` to subscribe to all available streams. +- The federation stream is only available if federation sending has + been disabled on the main process. +- The server will only time connections out that have sent a `PING` + command. If a ping is sent then the connection will be closed if no + further commands are receieved within 15s. Both the client and + server protocol implementations will send an initial PING on + connection and ensure at least one command every 5s is sent (not + necessarily `PING`). +- `RDATA` commands *usually* include a numeric token, however if the + stream has multiple rows to replicate per token the server will send + multiple `RDATA` commands, with all but the last having a token of + `batch`. See the documentation on `commands.RdataCommand` for + further details. + +## Architecture + +The basic structure of the protocol is line based, where the initial +word of each line specifies the command. The rest of the line is parsed +based on the command. For example, the RDATA command is defined as: + + RDATA + +(Note that may contains spaces, but cannot contain +newlines.) + +Blank lines are ignored. + +### Keep alives + +Both sides are expected to send at least one command every 5s or so, and +should send a `PING` command if necessary. If either side do not receive +a command within e.g. 15s then the connection should be closed. + +Because the server may be connected to manually using e.g. netcat, the +timeouts aren't enabled until an initial `PING` command is seen. Both +the client and server implementations below send a `PING` command +immediately on connection to ensure the timeouts are enabled. + +This ensures that both sides can quickly realize if the tcp connection +has gone and handle the situation appropriately. + +### Start up + +When a new connection is made, the server: + +- Sends a `SERVER` command, which includes the identity of the server, + allowing the client to detect if its connected to the expected + server +- Sends a `PING` command as above, to enable the client to time out + connections promptly. + +The client: + +- Sends a `NAME` command, allowing the server to associate a human + friendly name with the connection. This is optional. +- Sends a `PING` as above +- For each stream the client wishes to subscribe to it sends a + `REPLICATE` with the `stream_name` and token it wants to subscribe + from. +- On receipt of a `SERVER` command, checks that the server name + matches the expected server name. + +### Error handling + +If either side detects an error it can send an `ERROR` command and close +the connection. + +If the client side loses the connection to the server it should +reconnect, following the steps above. + +### Congestion + +If the server sends messages faster than the client can consume them the +server will first buffer a (fairly large) number of commands and then +disconnect the client. This ensures that we don't queue up an unbounded +number of commands in memory and gives us a potential oppurtunity to +squawk loudly. When/if the client recovers it can reconnect to the +server and ask for missed messages. + +### Reliability + +In general the replication stream should be considered an unreliable +transport since e.g. commands are not resent if the connection +disappears. + +The exception to that are the replication streams, i.e. RDATA commands, +since these include tokens which can be used to restart the stream on +connection errors. + +The client should keep track of the token in the last RDATA command +received for each stream so that on reconneciton it can start streaming +from the correct place. Note: not all RDATA have valid tokens due to +batching. See `RdataCommand` for more details. + +### Example + +An example iteraction is shown below. Each line is prefixed with '>' +or '<' to indicate which side is sending, these are *not* included on +the wire: + + * connection established * + > SERVER localhost:8823 + > PING 1490197665618 + < NAME synapse.app.appservice + < PING 1490197665618 + < REPLICATE events 1 + < REPLICATE backfill 1 + < REPLICATE caches 1 + > POSITION events 1 + > POSITION backfill 1 + > POSITION caches 1 + > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513] + > RDATA events 14 ["$149019767112vOHxz:localhost:8823", + "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null] + < PING 1490197675618 + > ERROR server stopping + * connection closed by server * + +The `POSITION` command sent by the server is used to set the clients +position without needing to send data with the `RDATA` command. + +An example of a batched set of `RDATA` is: + + > RDATA caches batch ["get_user_by_id",["@test:localhost:8823"],1490197670513] + > RDATA caches batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513] + > RDATA caches batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513] + > RDATA caches 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513] + +In this case the client shouldn't advance their caches token until it +sees the the last `RDATA`. + +### List of commands + +The list of valid commands, with which side can send it: server (S) or +client (C): + +#### SERVER (S) + + Sent at the start to identify which server the client is talking to + +#### RDATA (S) + + A single update in a stream + +#### POSITION (S) + + The position of the stream has been updated. Sent to the client + after all missing updates for a stream have been sent to the client + and they're now up to date. + +#### ERROR (S, C) + + There was an error + +#### PING (S, C) + + Sent periodically to ensure the connection is still alive + +#### NAME (C) + + Sent at the start by client to inform the server who they are + +#### REPLICATE (C) + + Asks the server to replicate a given stream + +#### USER_SYNC (C) + + A user has started or stopped syncing + +#### FEDERATION_ACK (C) + + Acknowledge receipt of some federation data + +#### REMOVE_PUSHER (C) + + Inform the server a pusher should be removed + +#### INVALIDATE_CACHE (C) + + Inform the server a cache should be invalidated + +#### SYNC (S, C) + + Used exclusively in tests + +See `synapse/replication/tcp/commands.py` for a detailed description and +the format of each command. + +### Cache Invalidation Stream + +The cache invalidation stream is used to inform workers when they need +to invalidate any of their caches in the data store. This is done by +streaming all cache invalidations done on master down to the workers, +assuming that any caches on the workers also exist on the master. + +Each individual cache invalidation results in a row being sent down +replication, which includes the cache name (the name of the function) +and they key to invalidate. For example: + + > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251] + +However, there are times when a number of caches need to be invalidated +at the same time with the same key. To reduce traffic we batch those +invalidations into a single poke by defining a special cache name that +workers understand to mean to expand to invalidate the correct caches. + +Currently the special cache names are declared in +`synapse/storage/_base.py` and are: + +1. `cs_cache_fake` ─ invalidates caches that depend on the current + state diff --git a/docs/tcp_replication.rst b/docs/tcp_replication.rst deleted file mode 100644 index 75e723484c..0000000000 --- a/docs/tcp_replication.rst +++ /dev/null @@ -1,249 +0,0 @@ -TCP Replication -=============== - -Motivation ----------- - -Previously the workers used an HTTP long poll mechanism to get updates from the -master, which had the problem of causing a lot of duplicate work on the server. -This TCP protocol replaces those APIs with the aim of increased efficiency. - - - -Overview --------- - -The protocol is based on fire and forget, line based commands. An example flow -would be (where '>' indicates master to worker and '<' worker to master flows):: - - > SERVER example.com - < REPLICATE events 53 - > RDATA events 54 ["$foo1:bar.com", ...] - > RDATA events 55 ["$foo4:bar.com", ...] - -The example shows the server accepting a new connection and sending its identity -with the ``SERVER`` command, followed by the client asking to subscribe to the -``events`` stream from the token ``53``. The server then periodically sends ``RDATA`` -commands which have the format ``RDATA ``, where the -format of ```` is defined by the individual streams. - -Error reporting happens by either the client or server sending an `ERROR` -command, and usually the connection will be closed. - - -Since the protocol is a simple line based, its possible to manually connect to -the server using a tool like netcat. A few things should be noted when manually -using the protocol: - -* When subscribing to a stream using ``REPLICATE``, the special token ``NOW`` can - be used to get all future updates. The special stream name ``ALL`` can be used - with ``NOW`` to subscribe to all available streams. -* The federation stream is only available if federation sending has been - disabled on the main process. -* The server will only time connections out that have sent a ``PING`` command. - If a ping is sent then the connection will be closed if no further commands - are receieved within 15s. Both the client and server protocol implementations - will send an initial PING on connection and ensure at least one command every - 5s is sent (not necessarily ``PING``). -* ``RDATA`` commands *usually* include a numeric token, however if the stream - has multiple rows to replicate per token the server will send multiple - ``RDATA`` commands, with all but the last having a token of ``batch``. See - the documentation on ``commands.RdataCommand`` for further details. - - -Architecture ------------- - -The basic structure of the protocol is line based, where the initial word of -each line specifies the command. The rest of the line is parsed based on the -command. For example, the `RDATA` command is defined as:: - - RDATA - -(Note that `` may contains spaces, but cannot contain newlines.) - -Blank lines are ignored. - - -Keep alives -~~~~~~~~~~~ - -Both sides are expected to send at least one command every 5s or so, and -should send a ``PING`` command if necessary. If either side do not receive a -command within e.g. 15s then the connection should be closed. - -Because the server may be connected to manually using e.g. netcat, the timeouts -aren't enabled until an initial ``PING`` command is seen. Both the client and -server implementations below send a ``PING`` command immediately on connection to -ensure the timeouts are enabled. - -This ensures that both sides can quickly realize if the tcp connection has gone -and handle the situation appropriately. - - -Start up -~~~~~~~~ - -When a new connection is made, the server: - -* Sends a ``SERVER`` command, which includes the identity of the server, allowing - the client to detect if its connected to the expected server -* Sends a ``PING`` command as above, to enable the client to time out connections - promptly. - -The client: - -* Sends a ``NAME`` command, allowing the server to associate a human friendly - name with the connection. This is optional. -* Sends a ``PING`` as above -* For each stream the client wishes to subscribe to it sends a ``REPLICATE`` - with the stream_name and token it wants to subscribe from. -* On receipt of a ``SERVER`` command, checks that the server name matches the - expected server name. - - -Error handling -~~~~~~~~~~~~~~ - -If either side detects an error it can send an ``ERROR`` command and close the -connection. - -If the client side loses the connection to the server it should reconnect, -following the steps above. - - -Congestion -~~~~~~~~~~ - -If the server sends messages faster than the client can consume them the server -will first buffer a (fairly large) number of commands and then disconnect the -client. This ensures that we don't queue up an unbounded number of commands in -memory and gives us a potential oppurtunity to squawk loudly. When/if the client -recovers it can reconnect to the server and ask for missed messages. - - -Reliability -~~~~~~~~~~~ - -In general the replication stream should be considered an unreliable transport -since e.g. commands are not resent if the connection disappears. - -The exception to that are the replication streams, i.e. RDATA commands, since -these include tokens which can be used to restart the stream on connection -errors. - -The client should keep track of the token in the last RDATA command received -for each stream so that on reconneciton it can start streaming from the correct -place. Note: not all RDATA have valid tokens due to batching. See -``RdataCommand`` for more details. - -Example -~~~~~~~ - -An example iteraction is shown below. Each line is prefixed with '>' or '<' to -indicate which side is sending, these are *not* included on the wire:: - - * connection established * - > SERVER localhost:8823 - > PING 1490197665618 - < NAME synapse.app.appservice - < PING 1490197665618 - < REPLICATE events 1 - < REPLICATE backfill 1 - < REPLICATE caches 1 - > POSITION events 1 - > POSITION backfill 1 - > POSITION caches 1 - > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513] - > RDATA events 14 ["$149019767112vOHxz:localhost:8823", - "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null] - < PING 1490197675618 - > ERROR server stopping - * connection closed by server * - -The ``POSITION`` command sent by the server is used to set the clients position -without needing to send data with the ``RDATA`` command. - - -An example of a batched set of ``RDATA`` is:: - - > RDATA caches batch ["get_user_by_id",["@test:localhost:8823"],1490197670513] - > RDATA caches batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513] - > RDATA caches batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513] - > RDATA caches 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513] - -In this case the client shouldn't advance their caches token until it sees the -the last ``RDATA``. - - -List of commands -~~~~~~~~~~~~~~~~ - -The list of valid commands, with which side can send it: server (S) or client (C): - -SERVER (S) - Sent at the start to identify which server the client is talking to - -RDATA (S) - A single update in a stream - -POSITION (S) - The position of the stream has been updated. Sent to the client after all - missing updates for a stream have been sent to the client and they're now - up to date. - -ERROR (S, C) - There was an error - -PING (S, C) - Sent periodically to ensure the connection is still alive - -NAME (C) - Sent at the start by client to inform the server who they are - -REPLICATE (C) - Asks the server to replicate a given stream - -USER_SYNC (C) - A user has started or stopped syncing - -FEDERATION_ACK (C) - Acknowledge receipt of some federation data - -REMOVE_PUSHER (C) - Inform the server a pusher should be removed - -INVALIDATE_CACHE (C) - Inform the server a cache should be invalidated - -SYNC (S, C) - Used exclusively in tests - - -See ``synapse/replication/tcp/commands.py`` for a detailed description and the -format of each command. - - -Cache Invalidation Stream -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The cache invalidation stream is used to inform workers when they need to -invalidate any of their caches in the data store. This is done by streaming all -cache invalidations done on master down to the workers, assuming that any caches -on the workers also exist on the master. - -Each individual cache invalidation results in a row being sent down replication, -which includes the cache name (the name of the function) and they key to -invalidate. For example:: - - > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251] - -However, there are times when a number of caches need to be invalidated at the -same time with the same key. To reduce traffic we batch those invalidations into -a single poke by defining a special cache name that workers understand to mean -to expand to invalidate the correct caches. - -Currently the special cache names are declared in ``synapse/storage/_base.py`` -and are: - -1. ``cs_cache_fake`` ─ invalidates caches that depend on the current state diff --git a/docs/turn-howto.md b/docs/turn-howto.md new file mode 100644 index 0000000000..4a983621e5 --- /dev/null +++ b/docs/turn-howto.md @@ -0,0 +1,123 @@ +# Overview + +This document explains how to enable VoIP relaying on your Home Server with +TURN. + +The synapse Matrix Home Server supports integration with TURN server via the +[TURN server REST API](). This +allows the Home Server to generate credentials that are valid for use on the +TURN server through the use of a secret shared between the Home Server and the +TURN server. + +The following sections describe how to install [coturn]() (which implements the TURN REST API) and integrate it with synapse. + +## `coturn` Setup + +### Initial installation + +The TURN daemon `coturn` is available from a variety of sources such as native package managers, or installation from source. + +#### Debian installation + + # apt install coturn + +#### Source installation + +1. Download the [latest release](https://github.com/coturn/coturn/releases/latest) from github. Unpack it and `cd` into the directory. + +1. Configure it: + + ./configure + + > You may need to install `libevent2`: if so, you should do so in + > the way recommended by your operating system. You can ignore + > warnings about lack of database support: a database is unnecessary + > for this purpose. + +1. Build and install it: + + make + make install + +1. Create or edit the config file in `/etc/turnserver.conf`. The relevant + lines, with example values, are: + + use-auth-secret + static-auth-secret=[your secret key here] + realm=turn.myserver.org + + See `turnserver.conf` for explanations of the options. One way to generate + the `static-auth-secret` is with `pwgen`: + + pwgen -s 64 1 + +1. Consider your security settings. TURN lets users request a relay which will + connect to arbitrary IP addresses and ports. The following configuration is + suggested as a minimum starting point: + + # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay. + no-tcp-relay + + # don't let the relay ever try to connect to private IP address ranges within your network (if any) + # given the turn server is likely behind your firewall, remember to include any privileged public IPs too. + denied-peer-ip=10.0.0.0-10.255.255.255 + denied-peer-ip=192.168.0.0-192.168.255.255 + denied-peer-ip=172.16.0.0-172.31.255.255 + + # special case the turn server itself so that client->TURN->TURN->client flows work + allowed-peer-ip=10.0.0.1 + + # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS. + user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user. + total-quota=1200 + + Ideally coturn should refuse to relay traffic which isn't SRTP; see + + +1. Ensure your firewall allows traffic into the TURN server on the ports + you've configured it to listen on (remember to allow both TCP and UDP TURN + traffic) + +1. If you've configured coturn to support TLS/DTLS, generate or import your + private key and certificate. + +1. Start the turn server: + + bin/turnserver -o + +## synapse Setup + +Your home server configuration file needs the following extra keys: + +1. "`turn_uris`": This needs to be a yaml list of public-facing URIs + for your TURN server to be given out to your clients. Add separate + entries for each transport your TURN server supports. +2. "`turn_shared_secret`": This is the secret shared between your + Home server and your TURN server, so you should set it to the same + string you used in turnserver.conf. +3. "`turn_user_lifetime`": This is the amount of time credentials + generated by your Home Server are valid for (in milliseconds). + Shorter times offer less potential for abuse at the expense of + increased traffic between web clients and your home server to + refresh credentials. The TURN REST API specification recommends + one day (86400000). +4. "`turn_allow_guests`": Whether to allow guest users to use the + TURN server. This is enabled by default, as otherwise VoIP will + not work reliably for guests. However, it does introduce a + security risk as it lets guests connect to arbitrary endpoints + without having gone through a CAPTCHA or similar to register a + real account. + +As an example, here is the relevant section of the config file for matrix.org: + + turn_uris: [ "turn:turn.matrix.org:3478?transport=udp", "turn:turn.matrix.org:3478?transport=tcp" ] + turn_shared_secret: n0t4ctuAllymatr1Xd0TorgSshar3d5ecret4obvIousreAsons + turn_user_lifetime: 86400000 + turn_allow_guests: True + +After updating the homeserver configuration, you must restart synapse: + + cd /where/you/run/synapse + ./synctl restart + +..and your Home Server now supports VoIP relaying! diff --git a/docs/turn-howto.rst b/docs/turn-howto.rst deleted file mode 100644 index a2fc5c8820..0000000000 --- a/docs/turn-howto.rst +++ /dev/null @@ -1,127 +0,0 @@ -How to enable VoIP relaying on your Home Server with TURN - -Overview --------- -The synapse Matrix Home Server supports integration with TURN server via the -TURN server REST API -(http://tools.ietf.org/html/draft-uberti-behave-turn-rest-00). This allows -the Home Server to generate credentials that are valid for use on the TURN -server through the use of a secret shared between the Home Server and the -TURN server. - -This document describes how to install coturn -(https://github.com/coturn/coturn) which also supports the TURN REST API, -and integrate it with synapse. - -coturn Setup -============ - -You may be able to setup coturn via your package manager, or set it up manually using the usual ``configure, make, make install`` process. - - 1. Check out coturn:: - - git clone https://github.com/coturn/coturn.git coturn - cd coturn - - 2. Configure it:: - - ./configure - - You may need to install ``libevent2``: if so, you should do so - in the way recommended by your operating system. - You can ignore warnings about lack of database support: a - database is unnecessary for this purpose. - - 3. Build and install it:: - - make - make install - - 4. Create or edit the config file in ``/etc/turnserver.conf``. The relevant - lines, with example values, are:: - - use-auth-secret - static-auth-secret=[your secret key here] - realm=turn.myserver.org - - See turnserver.conf for explanations of the options. - One way to generate the static-auth-secret is with pwgen:: - - pwgen -s 64 1 - - 5. Consider your security settings. TURN lets users request a relay - which will connect to arbitrary IP addresses and ports. At the least - we recommend:: - - # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay. - no-tcp-relay - - # don't let the relay ever try to connect to private IP address ranges within your network (if any) - # given the turn server is likely behind your firewall, remember to include any privileged public IPs too. - denied-peer-ip=10.0.0.0-10.255.255.255 - denied-peer-ip=192.168.0.0-192.168.255.255 - denied-peer-ip=172.16.0.0-172.31.255.255 - - # special case the turn server itself so that client->TURN->TURN->client flows work - allowed-peer-ip=10.0.0.1 - - # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS. - user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user. - total-quota=1200 - - Ideally coturn should refuse to relay traffic which isn't SRTP; - see https://github.com/matrix-org/synapse/issues/2009 - - 6. Ensure your firewall allows traffic into the TURN server on - the ports you've configured it to listen on (remember to allow - both TCP and UDP TURN traffic) - - 7. If you've configured coturn to support TLS/DTLS, generate or - import your private key and certificate. - - 8. Start the turn server:: - - bin/turnserver -o - - -synapse Setup -============= - -Your home server configuration file needs the following extra keys: - - 1. "turn_uris": This needs to be a yaml list - of public-facing URIs for your TURN server to be given out - to your clients. Add separate entries for each transport your - TURN server supports. - - 2. "turn_shared_secret": This is the secret shared between your Home - server and your TURN server, so you should set it to the same - string you used in turnserver.conf. - - 3. "turn_user_lifetime": This is the amount of time credentials - generated by your Home Server are valid for (in milliseconds). - Shorter times offer less potential for abuse at the expense - of increased traffic between web clients and your home server - to refresh credentials. The TURN REST API specification recommends - one day (86400000). - - 4. "turn_allow_guests": Whether to allow guest users to use the TURN - server. This is enabled by default, as otherwise VoIP will not - work reliably for guests. However, it does introduce a security risk - as it lets guests connect to arbitrary endpoints without having gone - through a CAPTCHA or similar to register a real account. - -As an example, here is the relevant section of the config file for -matrix.org:: - - turn_uris: [ "turn:turn.matrix.org:3478?transport=udp", "turn:turn.matrix.org:3478?transport=tcp" ] - turn_shared_secret: n0t4ctuAllymatr1Xd0TorgSshar3d5ecret4obvIousreAsons - turn_user_lifetime: 86400000 - turn_allow_guests: True - -Now, restart synapse:: - - cd /where/you/run/synapse - ./synctl restart - -...and your Home Server now supports VoIP relaying! diff --git a/docs/workers.md b/docs/workers.md new file mode 100644 index 0000000000..4bd60ba0a0 --- /dev/null +++ b/docs/workers.md @@ -0,0 +1,284 @@ +# Scaling synapse via workers + +Synapse has experimental support for splitting out functionality into +multiple separate python processes, helping greatly with scalability. These +processes are called 'workers', and are (eventually) intended to scale +horizontally independently. + +All of the below is highly experimental and subject to change as Synapse evolves, +but documenting it here to help folks needing highly scalable Synapses similar +to the one running matrix.org! + +All processes continue to share the same database instance, and as such, workers +only work with postgres based synapse deployments (sharing a single sqlite +across multiple processes is a recipe for disaster, plus you should be using +postgres anyway if you care about scalability). + +The workers communicate with the master synapse process via a synapse-specific +TCP protocol called 'replication' - analogous to MySQL or Postgres style +database replication; feeding a stream of relevant data to the workers so they +can be kept in sync with the main synapse process and database state. + +## Configuration + +To make effective use of the workers, you will need to configure an HTTP +reverse-proxy such as nginx or haproxy, which will direct incoming requests to +the correct worker, or to the main synapse instance. Note that this includes +requests made to the federation port. See [reverse_proxy.md](reverse_proxy.md) +for information on setting up a reverse proxy. + +To enable workers, you need to add two replication listeners to the master +synapse, e.g.: + + listeners: + # The TCP replication port + - port: 9092 + bind_address: '127.0.0.1' + type: replication + # The HTTP replication port + - port: 9093 + bind_address: '127.0.0.1' + type: http + resources: + - names: [replication] + +Under **no circumstances** should these replication API listeners be exposed to +the public internet; it currently implements no authentication whatsoever and is +unencrypted. + +(Roughly, the TCP port is used for streaming data from the master to the +workers, and the HTTP port for the workers to send data to the main +synapse process.) + +You then create a set of configs for the various worker processes. These +should be worker configuration files, and should be stored in a dedicated +subdirectory, to allow synctl to manipulate them. An additional configuration +for the master synapse process will need to be created because the process will +not be started automatically. That configuration should look like this: + + worker_app: synapse.app.homeserver + daemonize: true + +Each worker configuration file inherits the configuration of the main homeserver +configuration file. You can then override configuration specific to that worker, +e.g. the HTTP listener that it provides (if any); logging configuration; etc. +You should minimise the number of overrides though to maintain a usable config. + +You must specify the type of worker application (`worker_app`). The currently +available worker applications are listed below. You must also specify the +replication endpoints that it's talking to on the main synapse process. +`worker_replication_host` should specify the host of the main synapse, +`worker_replication_port` should point to the TCP replication listener port and +`worker_replication_http_port` should point to the HTTP replication port. + +Currently, the `event_creator` and `federation_reader` workers require specifying +`worker_replication_http_port`. + +For instance: + + worker_app: synapse.app.synchrotron + + # The replication listener on the synapse to talk to. + worker_replication_host: 127.0.0.1 + worker_replication_port: 9092 + worker_replication_http_port: 9093 + + worker_listeners: + - type: http + port: 8083 + resources: + - names: + - client + + worker_daemonize: True + worker_pid_file: /home/matrix/synapse/synchrotron.pid + worker_log_config: /home/matrix/synapse/config/synchrotron_log_config.yaml + +...is a full configuration for a synchrotron worker instance, which will expose a +plain HTTP `/sync` endpoint on port 8083 separately from the `/sync` endpoint provided +by the main synapse. + +Obviously you should configure your reverse-proxy to route the relevant +endpoints to the worker (`localhost:8083` in the above example). + +Finally, to actually run your worker-based synapse, you must pass synctl the -a +commandline option to tell it to operate on all the worker configurations found +in the given directory, e.g.: + + synctl -a $CONFIG/workers start + +Currently one should always restart all workers when restarting or upgrading +synapse, unless you explicitly know it's safe not to. For instance, restarting +synapse without restarting all the synchrotrons may result in broken typing +notifications. + +To manipulate a specific worker, you pass the -w option to synctl: + + synctl -w $CONFIG/workers/synchrotron.yaml restart + +## Available worker applications + +### `synapse.app.pusher` + +Handles sending push notifications to sygnal and email. Doesn't handle any +REST endpoints itself, but you should set `start_pushers: False` in the +shared configuration file to stop the main synapse sending these notifications. + +Note this worker cannot be load-balanced: only one instance should be active. + +### `synapse.app.synchrotron` + +The synchrotron handles `sync` requests from clients. In particular, it can +handle REST endpoints matching the following regular expressions: + + ^/_matrix/client/(v2_alpha|r0)/sync$ + ^/_matrix/client/(api/v1|v2_alpha|r0)/events$ + ^/_matrix/client/(api/v1|r0)/initialSync$ + ^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$ + +The above endpoints should all be routed to the synchrotron worker by the +reverse-proxy configuration. + +It is possible to run multiple instances of the synchrotron to scale +horizontally. In this case the reverse-proxy should be configured to +load-balance across the instances, though it will be more efficient if all +requests from a particular user are routed to a single instance. Extracting +a userid from the access token is currently left as an exercise for the reader. + +### `synapse.app.appservice` + +Handles sending output traffic to Application Services. Doesn't handle any +REST endpoints itself, but you should set `notify_appservices: False` in the +shared configuration file to stop the main synapse sending these notifications. + +Note this worker cannot be load-balanced: only one instance should be active. + +### `synapse.app.federation_reader` + +Handles a subset of federation endpoints. In particular, it can handle REST +endpoints matching the following regular expressions: + + ^/_matrix/federation/v1/event/ + ^/_matrix/federation/v1/state/ + ^/_matrix/federation/v1/state_ids/ + ^/_matrix/federation/v1/backfill/ + ^/_matrix/federation/v1/get_missing_events/ + ^/_matrix/federation/v1/publicRooms + ^/_matrix/federation/v1/query/ + ^/_matrix/federation/v1/make_join/ + ^/_matrix/federation/v1/make_leave/ + ^/_matrix/federation/v1/send_join/ + ^/_matrix/federation/v1/send_leave/ + ^/_matrix/federation/v1/invite/ + ^/_matrix/federation/v1/query_auth/ + ^/_matrix/federation/v1/event_auth/ + ^/_matrix/federation/v1/exchange_third_party_invite/ + ^/_matrix/federation/v1/send/ + ^/_matrix/key/v2/query + +The above endpoints should all be routed to the federation_reader worker by the +reverse-proxy configuration. + +The `^/_matrix/federation/v1/send/` endpoint must only be handled by a single +instance. + +### `synapse.app.federation_sender` + +Handles sending federation traffic to other servers. Doesn't handle any +REST endpoints itself, but you should set `send_federation: False` in the +shared configuration file to stop the main synapse sending this traffic. + +Note this worker cannot be load-balanced: only one instance should be active. + +### `synapse.app.media_repository` + +Handles the media repository. It can handle all endpoints starting with: + + /_matrix/media/ + +And the following regular expressions matching media-specific administration APIs: + + ^/_synapse/admin/v1/purge_media_cache$ + ^/_synapse/admin/v1/room/.*/media$ + ^/_synapse/admin/v1/quarantine_media/.*$ + +You should also set `enable_media_repo: False` in the shared configuration +file to stop the main synapse running background jobs related to managing the +media repository. + +Note this worker cannot be load-balanced: only one instance should be active. + +### `synapse.app.client_reader` + +Handles client API endpoints. It can handle REST endpoints matching the +following regular expressions: + + ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ + ^/_matrix/client/(api/v1|r0|unstable)/login$ + ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ + ^/_matrix/client/versions$ + ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$ + +Additionally, the following REST endpoints can be handled for GET requests: + + ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$ + +Additionally, the following REST endpoints can be handled, but all requests must +be routed to the same instance: + + ^/_matrix/client/(r0|unstable)/register$ + +Pagination requests can also be handled, but all requests with the same path +room must be routed to the same instance. Additionally, care must be taken to +ensure that the purge history admin API is not used while pagination requests +for the room are in flight: + + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$ + +### `synapse.app.user_dir` + +Handles searches in the user directory. It can handle REST endpoints matching +the following regular expressions: + + ^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$ + +### `synapse.app.frontend_proxy` + +Proxies some frequently-requested client endpoints to add caching and remove +load from the main synapse. It can handle REST endpoints matching the following +regular expressions: + + ^/_matrix/client/(api/v1|r0|unstable)/keys/upload + +If `use_presence` is False in the homeserver config, it can also handle REST +endpoints matching the following regular expressions: + + ^/_matrix/client/(api/v1|r0|unstable)/presence/[^/]+/status + +This "stub" presence handler will pass through `GET` request but make the +`PUT` effectively a no-op. + +It will proxy any requests it cannot handle to the main synapse instance. It +must therefore be configured with the location of the main instance, via +the `worker_main_http_uri` setting in the `frontend_proxy` worker configuration +file. For example: + + worker_main_http_uri: http://127.0.0.1:8008 + +### `synapse.app.event_creator` + +Handles some event creation. It can handle REST endpoints matching: + + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ + ^/_matrix/client/(api/v1|r0|unstable)/join/ + ^/_matrix/client/(api/v1|r0|unstable)/profile/ + +It will create events locally and then send them on to the main synapse +instance to be persisted and handled. diff --git a/docs/workers.rst b/docs/workers.rst deleted file mode 100644 index e11e117418..0000000000 --- a/docs/workers.rst +++ /dev/null @@ -1,301 +0,0 @@ -Scaling synapse via workers -=========================== - -Synapse has experimental support for splitting out functionality into -multiple separate python processes, helping greatly with scalability. These -processes are called 'workers', and are (eventually) intended to scale -horizontally independently. - -All of the below is highly experimental and subject to change as Synapse evolves, -but documenting it here to help folks needing highly scalable Synapses similar -to the one running matrix.org! - -All processes continue to share the same database instance, and as such, workers -only work with postgres based synapse deployments (sharing a single sqlite -across multiple processes is a recipe for disaster, plus you should be using -postgres anyway if you care about scalability). - -The workers communicate with the master synapse process via a synapse-specific -TCP protocol called 'replication' - analogous to MySQL or Postgres style -database replication; feeding a stream of relevant data to the workers so they -can be kept in sync with the main synapse process and database state. - -Configuration -------------- - -To make effective use of the workers, you will need to configure an HTTP -reverse-proxy such as nginx or haproxy, which will direct incoming requests to -the correct worker, or to the main synapse instance. Note that this includes -requests made to the federation port. See ``_ for -information on setting up a reverse proxy. - -To enable workers, you need to add two replication listeners to the master -synapse, e.g.:: - - listeners: - # The TCP replication port - - port: 9092 - bind_address: '127.0.0.1' - type: replication - # The HTTP replication port - - port: 9093 - bind_address: '127.0.0.1' - type: http - resources: - - names: [replication] - -Under **no circumstances** should these replication API listeners be exposed to -the public internet; it currently implements no authentication whatsoever and is -unencrypted. - -(Roughly, the TCP port is used for streaming data from the master to the -workers, and the HTTP port for the workers to send data to the main -synapse process.) - -You then create a set of configs for the various worker processes. These -should be worker configuration files, and should be stored in a dedicated -subdirectory, to allow synctl to manipulate them. An additional configuration -for the master synapse process will need to be created because the process will -not be started automatically. That configuration should look like this:: - - worker_app: synapse.app.homeserver - daemonize: true - -Each worker configuration file inherits the configuration of the main homeserver -configuration file. You can then override configuration specific to that worker, -e.g. the HTTP listener that it provides (if any); logging configuration; etc. -You should minimise the number of overrides though to maintain a usable config. - -You must specify the type of worker application (``worker_app``). The currently -available worker applications are listed below. You must also specify the -replication endpoints that it's talking to on the main synapse process. -``worker_replication_host`` should specify the host of the main synapse, -``worker_replication_port`` should point to the TCP replication listener port and -``worker_replication_http_port`` should point to the HTTP replication port. - -Currently, the ``event_creator`` and ``federation_reader`` workers require specifying -``worker_replication_http_port``. - -For instance:: - - worker_app: synapse.app.synchrotron - - # The replication listener on the synapse to talk to. - worker_replication_host: 127.0.0.1 - worker_replication_port: 9092 - worker_replication_http_port: 9093 - - worker_listeners: - - type: http - port: 8083 - resources: - - names: - - client - - worker_daemonize: True - worker_pid_file: /home/matrix/synapse/synchrotron.pid - worker_log_config: /home/matrix/synapse/config/synchrotron_log_config.yaml - -...is a full configuration for a synchrotron worker instance, which will expose a -plain HTTP ``/sync`` endpoint on port 8083 separately from the ``/sync`` endpoint provided -by the main synapse. - -Obviously you should configure your reverse-proxy to route the relevant -endpoints to the worker (``localhost:8083`` in the above example). - -Finally, to actually run your worker-based synapse, you must pass synctl the -a -commandline option to tell it to operate on all the worker configurations found -in the given directory, e.g.:: - - synctl -a $CONFIG/workers start - -Currently one should always restart all workers when restarting or upgrading -synapse, unless you explicitly know it's safe not to. For instance, restarting -synapse without restarting all the synchrotrons may result in broken typing -notifications. - -To manipulate a specific worker, you pass the -w option to synctl:: - - synctl -w $CONFIG/workers/synchrotron.yaml restart - - -Available worker applications ------------------------------ - -``synapse.app.pusher`` -~~~~~~~~~~~~~~~~~~~~~~ - -Handles sending push notifications to sygnal and email. Doesn't handle any -REST endpoints itself, but you should set ``start_pushers: False`` in the -shared configuration file to stop the main synapse sending these notifications. - -Note this worker cannot be load-balanced: only one instance should be active. - -``synapse.app.synchrotron`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The synchrotron handles ``sync`` requests from clients. In particular, it can -handle REST endpoints matching the following regular expressions:: - - ^/_matrix/client/(v2_alpha|r0)/sync$ - ^/_matrix/client/(api/v1|v2_alpha|r0)/events$ - ^/_matrix/client/(api/v1|r0)/initialSync$ - ^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$ - -The above endpoints should all be routed to the synchrotron worker by the -reverse-proxy configuration. - -It is possible to run multiple instances of the synchrotron to scale -horizontally. In this case the reverse-proxy should be configured to -load-balance across the instances, though it will be more efficient if all -requests from a particular user are routed to a single instance. Extracting -a userid from the access token is currently left as an exercise for the reader. - -``synapse.app.appservice`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles sending output traffic to Application Services. Doesn't handle any -REST endpoints itself, but you should set ``notify_appservices: False`` in the -shared configuration file to stop the main synapse sending these notifications. - -Note this worker cannot be load-balanced: only one instance should be active. - -``synapse.app.federation_reader`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles a subset of federation endpoints. In particular, it can handle REST -endpoints matching the following regular expressions:: - - ^/_matrix/federation/v1/event/ - ^/_matrix/federation/v1/state/ - ^/_matrix/federation/v1/state_ids/ - ^/_matrix/federation/v1/backfill/ - ^/_matrix/federation/v1/get_missing_events/ - ^/_matrix/federation/v1/publicRooms - ^/_matrix/federation/v1/query/ - ^/_matrix/federation/v1/make_join/ - ^/_matrix/federation/v1/make_leave/ - ^/_matrix/federation/v1/send_join/ - ^/_matrix/federation/v1/send_leave/ - ^/_matrix/federation/v1/invite/ - ^/_matrix/federation/v1/query_auth/ - ^/_matrix/federation/v1/event_auth/ - ^/_matrix/federation/v1/exchange_third_party_invite/ - ^/_matrix/federation/v1/send/ - ^/_matrix/key/v2/query - -The above endpoints should all be routed to the federation_reader worker by the -reverse-proxy configuration. - -The `^/_matrix/federation/v1/send/` endpoint must only be handled by a single -instance. - -``synapse.app.federation_sender`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles sending federation traffic to other servers. Doesn't handle any -REST endpoints itself, but you should set ``send_federation: False`` in the -shared configuration file to stop the main synapse sending this traffic. - -Note this worker cannot be load-balanced: only one instance should be active. - -``synapse.app.media_repository`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles the media repository. It can handle all endpoints starting with:: - - /_matrix/media/ - -And the following regular expressions matching media-specific administration -APIs:: - - ^/_synapse/admin/v1/purge_media_cache$ - ^/_synapse/admin/v1/room/.*/media$ - ^/_synapse/admin/v1/quarantine_media/.*$ - -You should also set ``enable_media_repo: False`` in the shared configuration -file to stop the main synapse running background jobs related to managing the -media repository. - -Note this worker cannot be load-balanced: only one instance should be active. - -``synapse.app.client_reader`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles client API endpoints. It can handle REST endpoints matching the -following regular expressions:: - - ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ - ^/_matrix/client/(api/v1|r0|unstable)/login$ - ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ - ^/_matrix/client/versions$ - ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$ - -Additionally, the following REST endpoints can be handled for GET requests:: - - ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$ - -Additionally, the following REST endpoints can be handled, but all requests must -be routed to the same instance:: - - ^/_matrix/client/(r0|unstable)/register$ - -Pagination requests can also be handled, but all requests with the same path -room must be routed to the same instance. Additionally, care must be taken to -ensure that the purge history admin API is not used while pagination requests -for the room are in flight:: - - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$ - - -``synapse.app.user_dir`` -~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles searches in the user directory. It can handle REST endpoints matching -the following regular expressions:: - - ^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$ - -``synapse.app.frontend_proxy`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Proxies some frequently-requested client endpoints to add caching and remove -load from the main synapse. It can handle REST endpoints matching the following -regular expressions:: - - ^/_matrix/client/(api/v1|r0|unstable)/keys/upload - -If ``use_presence`` is False in the homeserver config, it can also handle REST -endpoints matching the following regular expressions:: - - ^/_matrix/client/(api/v1|r0|unstable)/presence/[^/]+/status - -This "stub" presence handler will pass through ``GET`` request but make the -``PUT`` effectively a no-op. - -It will proxy any requests it cannot handle to the main synapse instance. It -must therefore be configured with the location of the main instance, via -the ``worker_main_http_uri`` setting in the frontend_proxy worker configuration -file. For example:: - - worker_main_http_uri: http://127.0.0.1:8008 - - -``synapse.app.event_creator`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles some event creation. It can handle REST endpoints matching:: - - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ - ^/_matrix/client/(api/v1|r0|unstable)/join/ - ^/_matrix/client/(api/v1|r0|unstable)/profile/ - -It will create events locally and then send them on to the main synapse -instance to be persisted and handled. diff --git a/synapse/config/server.py b/synapse/config/server.py index c8b9fe2d0f..7f8d315954 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -338,7 +338,7 @@ class ServerConfig(Config): ( "The metrics_port configuration option is deprecated in Synapse 0.31 " "in favour of a listener. Please see " - "http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.rst" + "http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md" " on how to configure the new listener." ) ) @@ -571,8 +571,8 @@ class ServerConfig(Config): # # type: the type of listener. Normally 'http', but other valid options are: # 'manhole' (see docs/manhole.md), - # 'metrics' (see docs/metrics-howto.rst), - # 'replication' (see docs/workers.rst). + # 'metrics' (see docs/metrics-howto.md), + # 'replication' (see docs/workers.md). # # tls: set to true to enable TLS for this listener. Will use the TLS # key/cert specified in tls_private_key_path / tls_certificate_path. @@ -607,12 +607,12 @@ class ServerConfig(Config): # # media: the media API (/_matrix/media). # - # metrics: the metrics interface. See docs/metrics-howto.rst. + # metrics: the metrics interface. See docs/metrics-howto.md. # # openid: OpenID authentication. # # replication: the HTTP replication API (/_synapse/replication). See - # docs/workers.rst. + # docs/workers.md. # # static: static resources under synapse/static (/_matrix/static). (Mostly # useful for 'fallback authentication'.) @@ -632,7 +632,7 @@ class ServerConfig(Config): # that unwraps TLS. # # If you plan to use a reverse proxy, please see - # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. + # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md. # %(unsecure_http_bindings)s -- cgit 1.4.1 From 6670bd407201f331353a4d402369da75b61ceca9 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 17 Sep 2019 18:05:13 +0100 Subject: v2 3PID Invites (part of MSC2140) (#5979) 3PID invites require making a request to an identity server to check that the invited 3PID has an Matrix ID linked, and if so, what it is. These requests are being made on behalf of a user. The user will supply an identity server and an access token for that identity server. The homeserver will then forward this request with the access token (using an `Authorization` header) and, if the given identity server doesn't support v2 endpoints, will fall back to v1 (which doesn't require any access tokens). Requires: ~~#5976~~ --- changelog.d/5979.feature | 1 + synapse/handlers/room_member.py | 104 +++++++++++++++++++++++++++++++--------- 2 files changed, 82 insertions(+), 23 deletions(-) create mode 100644 changelog.d/5979.feature (limited to 'synapse') diff --git a/changelog.d/5979.feature b/changelog.d/5979.feature new file mode 100644 index 0000000000..94888aa2d3 --- /dev/null +++ b/changelog.d/5979.feature @@ -0,0 +1 @@ +Use the v2 Identity Service API for 3PID invites. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 43d10a5308..35450feb6f 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -684,7 +684,14 @@ class RoomMemberHandler(object): ) else: yield self._make_and_store_3pid_invite( - requester, id_server, medium, address, room_id, inviter, txn_id=txn_id + requester, + id_server, + medium, + address, + room_id, + inviter, + txn_id=txn_id, + id_access_token=id_access_token, ) @defer.inlineCallbacks @@ -885,7 +892,15 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def _make_and_store_3pid_invite( - self, requester, id_server, medium, address, room_id, user, txn_id + self, + requester, + id_server, + medium, + address, + room_id, + user, + txn_id, + id_access_token=None, ): room_state = yield self.state_handler.get_current_state(room_id) @@ -934,6 +949,7 @@ class RoomMemberHandler(object): room_name=room_name, inviter_display_name=inviter_display_name, inviter_avatar_url=inviter_avatar_url, + id_access_token=id_access_token, ) ) @@ -971,6 +987,7 @@ class RoomMemberHandler(object): room_name, inviter_display_name, inviter_avatar_url, + id_access_token=None, ): """ Asks an identity server for a third party invite. @@ -990,6 +1007,8 @@ class RoomMemberHandler(object): inviter_display_name (str): The current display name of the inviter. inviter_avatar_url (str): The URL of the inviter's avatar. + id_access_token (str|None): The access token to authenticate to the identity + server with Returns: A deferred tuple containing: @@ -1000,11 +1019,6 @@ class RoomMemberHandler(object): display_name (str): A user-friendly name to represent the invited user. """ - is_url = "%s%s/_matrix/identity/api/v1/store-invite" % ( - id_server_scheme, - id_server, - ) - invite_config = { "medium": medium, "address": address, @@ -1017,22 +1031,67 @@ class RoomMemberHandler(object): "sender_display_name": inviter_display_name, "sender_avatar_url": inviter_avatar_url, } - try: - data = yield self.simple_http_client.post_json_get_json( - is_url, invite_config - ) - except HttpResponseException as e: - # Some identity servers may only support application/x-www-form-urlencoded - # types. This is especially true with old instances of Sydent, see - # https://github.com/matrix-org/sydent/pull/170 - logger.info( - "Failed to POST %s with JSON, falling back to urlencoded form: %s", - is_url, - e, + + # Add the identity service access token to the JSON body and use the v2 + # Identity Service endpoints if id_access_token is present + data = None + base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server) + + if id_access_token: + key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % ( + id_server_scheme, + id_server, ) - data = yield self.simple_http_client.post_urlencoded_get_json( - is_url, invite_config + + # Attempt a v2 lookup + url = base_url + "/v2/store-invite" + try: + data = yield self.simple_http_client.post_json_get_json( + url, + invite_config, + {"Authorization": create_id_access_token_header(id_access_token)}, + ) + except HttpResponseException as e: + if e.code != 404: + logger.info("Failed to POST %s with JSON: %s", url, e) + raise e + + if data is None: + key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % ( + id_server_scheme, + id_server, ) + url = base_url + "/api/v1/store-invite" + + try: + data = yield self.simple_http_client.post_json_get_json( + url, invite_config + ) + except HttpResponseException as e: + logger.warning( + "Error trying to call /store-invite on %s%s: %s", + id_server_scheme, + id_server, + e, + ) + + if data is None: + # Some identity servers may only support application/x-www-form-urlencoded + # types. This is especially true with old instances of Sydent, see + # https://github.com/matrix-org/sydent/pull/170 + try: + data = yield self.simple_http_client.post_urlencoded_get_json( + url, invite_config + ) + except HttpResponseException as e: + logger.warning( + "Error calling /store-invite on %s%s with fallback " + "encoding: %s", + id_server_scheme, + id_server, + e, + ) + raise e # TODO: Check for success token = data["token"] @@ -1040,8 +1099,7 @@ class RoomMemberHandler(object): if "public_key" in data: fallback_public_key = { "public_key": data["public_key"], - "key_validity_url": "%s%s/_matrix/identity/api/v1/pubkey/isvalid" - % (id_server_scheme, id_server), + "key_validity_url": key_validity_url, } else: fallback_public_key = public_keys[0] -- cgit 1.4.1 From a86a290850dad40c1ac38c4e20b2da039f246922 Mon Sep 17 00:00:00 2001 From: "J. Ryan Stinnett" Date: Wed, 18 Sep 2019 21:55:37 +0100 Subject: Fix logcontext spam on non-Linux platforms (#6059) This checks whether the current platform supports thread resource usage tracking before logging a warning to avoid log spam. Fixes https://github.com/matrix-org/synapse/issues/6055 --- changelog.d/6059.bugfix | 1 + synapse/logging/context.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6059.bugfix (limited to 'synapse') diff --git a/changelog.d/6059.bugfix b/changelog.d/6059.bugfix new file mode 100644 index 0000000000..49d5bd3fa0 --- /dev/null +++ b/changelog.d/6059.bugfix @@ -0,0 +1 @@ +Fix logcontext spam on non-Linux platforms. diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 63379bfb93..370000e377 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -42,13 +43,17 @@ try: # exception. resource.getrusage(RUSAGE_THREAD) + is_thread_resource_usage_supported = True + def get_thread_resource_usage(): return resource.getrusage(RUSAGE_THREAD) except Exception: # If the system doesn't support resource.getrusage(RUSAGE_THREAD) then we - # won't track resource usage by returning None. + # won't track resource usage. + is_thread_resource_usage_supported = False + def get_thread_resource_usage(): return None @@ -359,7 +364,11 @@ class LoggingContext(object): # When we stop, let's record the cpu used since we started if not self.usage_start: - logger.warning("Called stop on logcontext %s without calling start", self) + # Log a warning on platforms that support thread usage tracking + if is_thread_resource_usage_supported: + logger.warning( + "Called stop on logcontext %s without calling start", self + ) return utime_delta, stime_delta = self._get_cputime() -- cgit 1.4.1 From 38fd1f8e3faeffbd4bb3084012bb2c17a953625f Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Wed, 18 Sep 2019 22:30:44 +0100 Subject: Fix typo in account_threepid_delegates config (#6028) --- changelog.d/6028.feature | 1 + docs/sample_config.yaml | 2 +- synapse/config/registration.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6028.feature (limited to 'synapse') diff --git a/changelog.d/6028.feature b/changelog.d/6028.feature new file mode 100644 index 0000000000..cf603fa0c6 --- /dev/null +++ b/changelog.d/6028.feature @@ -0,0 +1 @@ +Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 1ee0ba8c30..3e4edc6b0b 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -938,7 +938,7 @@ uploads_path: "DATADIR/uploads" # https://matrix.org/docs/spec/identity_service/latest # account_threepid_delegates: - #email: https://example.com # Delegate email sending to matrix.org + #email: https://example.com # Delegate email sending to example.org #msisdn: http://localhost:8090 # Delegate SMS sending to this local process # Users who register on this homeserver will automatically be joined diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 9548560edb..d4654e99b3 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -294,7 +294,7 @@ class RegistrationConfig(Config): # https://matrix.org/docs/spec/identity_service/latest # account_threepid_delegates: - #email: https://example.com # Delegate email sending to matrix.org + #email: https://example.com # Delegate email sending to example.org #msisdn: http://localhost:8090 # Delegate SMS sending to this local process # Users who register on this homeserver will automatically be joined -- cgit 1.4.1 From 62e3ff92fd3228b5c34f6cee691e22f9b1f85c9e Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 19 Sep 2019 10:53:14 +0100 Subject: Remove POST method from password reset submit_token endpoint (#6056) Removes the POST method from `/password_reset//submit_token/` as it's only used by phone number verification which Synapse does not support yet. --- changelog.d/6056.bugfix | 1 + synapse/rest/client/v2_alpha/account.py | 17 ----------------- 2 files changed, 1 insertion(+), 17 deletions(-) create mode 100644 changelog.d/6056.bugfix (limited to 'synapse') diff --git a/changelog.d/6056.bugfix b/changelog.d/6056.bugfix new file mode 100644 index 0000000000..4d9573a58d --- /dev/null +++ b/changelog.d/6056.bugfix @@ -0,0 +1 @@ +Remove POST method from password reset submit_token endpoint until we implement submit_url functionality. \ No newline at end of file diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 2ea515d2f6..afaaeeacdd 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -272,23 +272,6 @@ class PasswordResetSubmitTokenServlet(RestServlet): request.write(html.encode("utf-8")) finish_request(request) - @defer.inlineCallbacks - def on_POST(self, request, medium): - if medium != "email": - raise SynapseError( - 400, "This medium is currently not supported for password resets" - ) - - body = parse_json_object_from_request(request) - assert_params_in_dict(body, ["sid", "client_secret", "token"]) - - valid, _ = yield self.store.validate_threepid_session( - body["sid"], body["client_secret"], body["token"], self.clock.time_msec() - ) - response_code = 200 if valid else 400 - - return response_code, {"success": valid} - class PasswordRestServlet(RestServlet): PATTERNS = client_patterns("/account/password$") -- cgit 1.4.1 From bcd91328692555d85df346c4571085c9b41b8f6a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 19 Sep 2019 15:06:27 +0100 Subject: Undo the deletion of some tables (#6047) This is a partial revert of #5893. The problem is that if we drop these tables in the same release as removing the code that writes to them, it prevents users users from being able to roll back to a previous release. So let's leave the tables in place for now, and remember to drop them in a subsequent release. (Note that these tables haven't been *read* for *years*, so any missing rows resulting from a temporary upgrade to vNext won't cause a problem.) --- changelog.d/5893.misc | 2 +- changelog.d/6047.misc | 2 ++ .../schema/delta/56/drop_unused_event_tables.sql | 20 -------------------- 3 files changed, 3 insertions(+), 21 deletions(-) create mode 100644 changelog.d/6047.misc delete mode 100644 synapse/storage/schema/delta/56/drop_unused_event_tables.sql (limited to 'synapse') diff --git a/changelog.d/5893.misc b/changelog.d/5893.misc index 07ee4888dc..5ef171cb3e 100644 --- a/changelog.d/5893.misc +++ b/changelog.d/5893.misc @@ -1 +1 @@ -Drop some unused tables. +Stop populating some unused tables. diff --git a/changelog.d/6047.misc b/changelog.d/6047.misc new file mode 100644 index 0000000000..a4cdb8abb3 --- /dev/null +++ b/changelog.d/6047.misc @@ -0,0 +1,2 @@ +Stop populating some unused tables. + diff --git a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql deleted file mode 100644 index 9f09922c67..0000000000 --- a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright 2019 The Matrix.org Foundation C.I.C. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- these tables are never used. -DROP TABLE IF EXISTS room_names; -DROP TABLE IF EXISTS topics; -DROP TABLE IF EXISTS history_visibility; -DROP TABLE IF EXISTS guest_access; -- cgit 1.4.1 From 7423fade92d98d4246947bc8491af5827cd9e0dd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 19 Sep 2019 17:16:50 +0100 Subject: better logging --- synapse/handlers/saml_handler.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse') diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index 5fa8272dc9..f000d2a00f 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -111,6 +111,8 @@ class SamlHandler: logger.warning("SAML2 response was not signed") raise SynapseError(400, "SAML2 response was not signed") + logger.info("Got SAML2 reponse with attributes: %s", saml2_auth.ava) + try: remote_user_id = saml2_auth.ava["uid"][0] except KeyError: -- cgit 1.4.1 From b74606ea2262a717193f08bb6876459c1ee2d97d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 19 Sep 2019 20:29:11 +0100 Subject: Fix a bug with saml attribute maps. Fixes a bug where the default attribute maps were prioritised over user-specified ones, resulting in incorrect mappings. The problem is that if you call SPConfig.load() multiple times, it adds new attribute mappers to a list. So by calling it with the default config first, and then the user-specified config, we would always get the default mappers before the user-specified mappers. To solve this, let's merge the config dicts first, and then pass them to SPConfig. --- changelog.d/6069.bugfix | 1 + synapse/config/saml2_config.py | 34 ++++++++++++++++++++++++++++------ synapse/util/module_loader.py | 20 +++++++++++++++++++- 3 files changed, 48 insertions(+), 7 deletions(-) create mode 100644 changelog.d/6069.bugfix (limited to 'synapse') diff --git a/changelog.d/6069.bugfix b/changelog.d/6069.bugfix new file mode 100644 index 0000000000..a437ac41a9 --- /dev/null +++ b/changelog.d/6069.bugfix @@ -0,0 +1 @@ +Fix a bug which caused SAML attribute maps to be overridden by defaults. diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index 6a8161547a..14539fdb2a 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2018 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,11 +13,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from synapse.python_dependencies import DependencyException, check_requirements +from synapse.util.module_loader import load_python_module from ._base import Config, ConfigError +def _dict_merge(merge_dict, into_dct): + for k, v in merge_dict.items(): + if k not in into_dct: + into_dct[k] = v + continue + + current_val = into_dct[k] + + if isinstance(v, dict) and isinstance(current_val, dict): + _dict_merge(v, current_val) + continue + + # otherwise we just overwrite + into_dct[k] = v + + class SAML2Config(Config): def read_config(self, config, **kwargs): self.saml2_enabled = False @@ -33,15 +52,18 @@ class SAML2Config(Config): self.saml2_enabled = True - import saml2.config - - self.saml2_sp_config = saml2.config.SPConfig() - self.saml2_sp_config.load(self._default_saml_config_dict()) - self.saml2_sp_config.load(saml2_config.get("sp_config", {})) + saml2_config_dict = self._default_saml_config_dict() + _dict_merge(saml2_config.get("sp_config", {}), saml2_config_dict) config_path = saml2_config.get("config_path", None) if config_path is not None: - self.saml2_sp_config.load_file(config_path) + mod = load_python_module(config_path) + _dict_merge(mod.CONFIG, saml2_config_dict) + + import saml2.config + + self.saml2_sp_config = saml2.config.SPConfig() + self.saml2_sp_config.load(saml2_config_dict) # session lifetime: in milliseconds self.saml2_session_lifetime = self.parse_duration( diff --git a/synapse/util/module_loader.py b/synapse/util/module_loader.py index 522acd5aa8..7ff7eb1e4d 100644 --- a/synapse/util/module_loader.py +++ b/synapse/util/module_loader.py @@ -14,12 +14,13 @@ # limitations under the License. import importlib +import importlib.util from synapse.config._base import ConfigError def load_module(provider): - """ Loads a module with its config + """ Loads a synapse module with its config Take a dict with keys 'module' (the module name) and 'config' (the config dict). @@ -38,3 +39,20 @@ def load_module(provider): raise ConfigError("Failed to parse config for %r: %r" % (provider["module"], e)) return provider_class, provider_config + + +def load_python_module(location: str): + """Load a python module, and return a reference to its global namespace + + Args: + location (str): path to the module + + Returns: + python module object + """ + spec = importlib.util.spec_from_file_location(location, location) + if spec is None: + raise Exception("Unable to load module at %s" % (location,)) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod -- cgit 1.4.1 From 36015d68efccd2520ac0a569a5f8714544f6568c Mon Sep 17 00:00:00 2001 From: "J. Ryan Stinnett" Date: Thu, 19 Sep 2019 22:28:30 +0100 Subject: Use unstable prefix for 3PID unbind API (#6062) --- changelog.d/5980.feature | 2 +- changelog.d/6062.bugfix | 1 + synapse/rest/client/v2_alpha/account.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6062.bugfix (limited to 'synapse') diff --git a/changelog.d/5980.feature b/changelog.d/5980.feature index f25d8d81d9..e20117cf1c 100644 --- a/changelog.d/5980.feature +++ b/changelog.d/5980.feature @@ -1 +1 @@ -Add POST /_matrix/client/r0/account/3pid/unbind endpoint from MSC2140 for unbinding a 3PID from an identity server without removing it from the homeserver user account. \ No newline at end of file +Add POST /_matrix/client/unstable/account/3pid/unbind endpoint from MSC2140 for unbinding a 3PID from an identity server without removing it from the homeserver user account. diff --git a/changelog.d/6062.bugfix b/changelog.d/6062.bugfix new file mode 100644 index 0000000000..e20117cf1c --- /dev/null +++ b/changelog.d/6062.bugfix @@ -0,0 +1 @@ +Add POST /_matrix/client/unstable/account/3pid/unbind endpoint from MSC2140 for unbinding a 3PID from an identity server without removing it from the homeserver user account. diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index afaaeeacdd..ce1487dbc5 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -529,7 +529,7 @@ class ThreepidRestServlet(RestServlet): class ThreepidUnbindRestServlet(RestServlet): - PATTERNS = client_patterns("/account/3pid/unbind$") + PATTERNS = client_patterns("/account/3pid/unbind$", releases=(), unstable=True) def __init__(self, hs): super(ThreepidUnbindRestServlet, self).__init__() -- cgit 1.4.1 From 3ac614eb6c294b7f77dde123f85ddaf3a389e3b8 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 20 Sep 2019 10:46:34 +0100 Subject: Drop support for bind param on POST /account/3pid (MSC2290) (#6067) As per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290/files#diff-05cde9463e9209b701312b3baf2fb2ebR151), we're dropping the bind parameter from `/account/3pid`. This endpoint can now only be used for adding threepid's to the user's account on the homeserver. --- changelog.d/6067.feature | 1 + synapse/rest/client/v2_alpha/account.py | 4 ---- sytest-blacklist | 9 +++++++++ 3 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6067.feature (limited to 'synapse') diff --git a/changelog.d/6067.feature b/changelog.d/6067.feature new file mode 100644 index 0000000000..72685961c9 --- /dev/null +++ b/changelog.d/6067.feature @@ -0,0 +1 @@ +Remove `bind` parameter from Client Server POST `/account` endpoint as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290/). \ No newline at end of file diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index ce1487dbc5..1791f4d79b 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -521,10 +521,6 @@ class ThreepidRestServlet(RestServlet): user_id, threepid["medium"], threepid["address"], threepid["validated_at"] ) - if "bind" in body and body["bind"]: - logger.debug("Binding threepid %s to %s", threepid, user_id) - yield self.identity_handler.bind_threepid(threepid_creds, user_id) - return 200, {} diff --git a/sytest-blacklist b/sytest-blacklist index 11785fd43f..04698cb068 100644 --- a/sytest-blacklist +++ b/sytest-blacklist @@ -29,3 +29,12 @@ Enabling an unknown default rule fails with 404 # Blacklisted due to https://github.com/matrix-org/synapse/issues/1663 New federated private chats get full presence information (SYN-115) + +# Blacklisted temporarily due to https://github.com/matrix-org/matrix-doc/pull/2290 +# These sytests need to be updated with new endpoints, which will come in a later PR +# That PR will also remove this blacklist +Can bind 3PID via home server +Can bind and unbind 3PID via homeserver +3PIDs are unbound after account deactivation +Can bind and unbind 3PID via /unbind by specifying the identity server +Can bind and unbind 3PID via /unbind without specifying the identity server -- cgit 1.4.1 From aeb40f355c8590855eeca05b49bfff2b91faa85b Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 20 Sep 2019 10:46:59 +0100 Subject: Ensure email validation link parameters are URL-encoded (#6063) The validation links sent via email had their query parameters inserted without any URL-encoding. Surprisingly this didn't seem to cause any issues, but if a user were to put a `/` in their client_secret it could lead to problems. --- changelog.d/6063.bugfix | 1 + synapse/push/mailer.py | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6063.bugfix (limited to 'synapse') diff --git a/changelog.d/6063.bugfix b/changelog.d/6063.bugfix new file mode 100644 index 0000000000..7485e32a2c --- /dev/null +++ b/changelog.d/6063.bugfix @@ -0,0 +1 @@ +Ensure query parameters in email validation links are URL-encoded. \ No newline at end of file diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 3dfd527849..2437235dc4 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -136,10 +136,11 @@ class Mailer(object): group together multiple email sending attempts sid (str): The generated session ID """ + params = {"token": token, "client_secret": client_secret, "sid": sid} link = ( self.hs.config.public_baseurl - + "_matrix/client/unstable/password_reset/email/submit_token" - "?token=%s&client_secret=%s&sid=%s" % (token, client_secret, sid) + + "_matrix/client/unstable/password_reset/email/submit_token?%s" + % urllib.parse.urlencode(params) ) template_vars = {"link": link} @@ -163,10 +164,11 @@ class Mailer(object): group together multiple email sending attempts sid (str): The generated session ID """ + params = {"token": token, "client_secret": client_secret, "sid": sid} link = ( self.hs.config.public_baseurl - + "_matrix/client/unstable/registration/email/submit_token" - "?token=%s&client_secret=%s&sid=%s" % (token, client_secret, sid) + + "_matrix/client/unstable/registration/email/submit_token?%s" + % urllib.parse.urlencode(params) ) template_vars = {"link": link} -- cgit 1.4.1 From 33757bad19a19adaef211a0d58fc369c68dfeb3c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 20 Sep 2019 11:15:14 +0100 Subject: More better logging --- synapse/handlers/saml_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index f000d2a00f..cc9e6b9bd0 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -111,7 +111,8 @@ class SamlHandler: logger.warning("SAML2 response was not signed") raise SynapseError(400, "SAML2 response was not signed") - logger.info("Got SAML2 reponse with attributes: %s", saml2_auth.ava) + logger.info("SAML2 response: %s", saml2_auth.origxml) + logger.info("SAML2 mapped attributes: %s", saml2_auth.ava) try: remote_user_id = saml2_auth.ava["uid"][0] -- cgit 1.4.1 From 9d94313209fdb2141189c927cb1f81fea6feb5e4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 20 Sep 2019 12:05:00 +0100 Subject: Fix exception when resetting retry timings Fixes: > TypeError: set_destination_retry_timings() missing 1 required positional argument: 'retry_interval' Introduced in #6016. --- changelog.d/6072.misc | 1 + synapse/federation/transport/server.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6072.misc (limited to 'synapse') diff --git a/changelog.d/6072.misc b/changelog.d/6072.misc new file mode 100644 index 0000000000..91cf164714 --- /dev/null +++ b/changelog.d/6072.misc @@ -0,0 +1 @@ +Add a 'failure_ts' column to the 'destinations' database table. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 7dc696c7ae..7f8a16e355 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -165,7 +165,7 @@ class Authenticator(object): async def _reset_retry_timings(self, origin): try: logger.info("Marking origin %r as up", origin) - await self.store.set_destination_retry_timings(origin, 0, 0) + await self.store.set_destination_retry_timings(origin, None, 0, 0) except Exception: logger.exception("Error resetting retry timings on %s", origin) -- cgit 1.4.1 From 7763dd3e9592909cfe3d7763f4a68b8135fc2bdc Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 20 Sep 2019 14:58:37 +0100 Subject: Remove trailing slash ability from password reset's submit_token endpoint (#6074) Remove trailing slash ability from the password reset submit_token endpoint. Since we provide the link in an email, and have never sent it with a trailing slash, there's no point for us to accept them on the endpoint. --- changelog.d/6074.feature | 1 + synapse/rest/client/v2_alpha/account.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6074.feature (limited to 'synapse') diff --git a/changelog.d/6074.feature b/changelog.d/6074.feature new file mode 100644 index 0000000000..b7aa9c99d8 --- /dev/null +++ b/changelog.d/6074.feature @@ -0,0 +1 @@ +Prevent password reset's submit_token endpoint from accepting trailing slashes. \ No newline at end of file diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 1791f4d79b..3c5b23dc80 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -200,7 +200,7 @@ class PasswordResetSubmitTokenServlet(RestServlet): """Handles 3PID validation token submission""" PATTERNS = client_patterns( - "/password_reset/(?P[^/]*)/submit_token/*$", releases=(), unstable=True + "/password_reset/(?P[^/]*)/submit_token$", releases=(), unstable=True ) def __init__(self, hs): -- cgit 1.4.1 From df3401a71d78088da36a03c73d35bc116c712df6 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 20 Sep 2019 15:21:30 +0100 Subject: Allow HS to send emails when adding an email to the HS (#6042) --- changelog.d/6042.feature | 1 + docs/sample_config.yaml | 12 ++ synapse/config/emailconfig.py | 36 ++++ synapse/handlers/identity.py | 17 +- synapse/push/mailer.py | 29 +++ synapse/res/templates/add_threepid.html | 9 + synapse/res/templates/add_threepid.txt | 6 + synapse/res/templates/add_threepid_failure.html | 8 + synapse/res/templates/add_threepid_success.html | 6 + synapse/rest/client/v2_alpha/account.py | 252 ++++++++++++++++++++---- synapse/rest/client/v2_alpha/register.py | 24 +-- synapse/storage/registration.py | 31 ++- 12 files changed, 359 insertions(+), 72 deletions(-) create mode 100644 changelog.d/6042.feature create mode 100644 synapse/res/templates/add_threepid.html create mode 100644 synapse/res/templates/add_threepid.txt create mode 100644 synapse/res/templates/add_threepid_failure.html create mode 100644 synapse/res/templates/add_threepid_success.html (limited to 'synapse') diff --git a/changelog.d/6042.feature b/changelog.d/6042.feature new file mode 100644 index 0000000000..a737760363 --- /dev/null +++ b/changelog.d/6042.feature @@ -0,0 +1 @@ +Allow homeserver to handle or delegate email validation when adding an email to a user's account. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 3e4edc6b0b..61d9f09a99 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1261,6 +1261,12 @@ password_config: # #registration_template_html: registration.html # #registration_template_text: registration.txt # +# # Templates for validation emails sent by the homeserver when adding an email to +# # your user account +# # +# #add_threepid_template_html: add_threepid.html +# #add_threepid_template_text: add_threepid.txt +# # # Templates for password reset success and failure pages that a user # # will see after attempting to reset their password # # @@ -1272,6 +1278,12 @@ password_config: # # # #registration_template_success_html: registration_success.html # #registration_template_failure_html: registration_failure.html +# +# # Templates for success and failure pages that a user will see after attempting +# # to add an email or phone to their account +# # +# #add_threepid_success_html: add_threepid_success.html +# #add_threepid_failure_html: add_threepid_failure.html #password_providers: diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index e5de768b0c..d9b43de660 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -169,12 +169,22 @@ class EmailConfig(Config): self.email_registration_template_text = email_config.get( "registration_template_text", "registration.txt" ) + self.email_add_threepid_template_html = email_config.get( + "add_threepid_template_html", "add_threepid.html" + ) + self.email_add_threepid_template_text = email_config.get( + "add_threepid_template_text", "add_threepid.txt" + ) + self.email_password_reset_template_failure_html = email_config.get( "password_reset_template_failure_html", "password_reset_failure.html" ) self.email_registration_template_failure_html = email_config.get( "registration_template_failure_html", "registration_failure.html" ) + self.email_add_threepid_template_failure_html = email_config.get( + "add_threepid_template_failure_html", "add_threepid_failure.html" + ) # These templates do not support any placeholder variables, so we # will read them from disk once during setup @@ -184,6 +194,9 @@ class EmailConfig(Config): email_registration_template_success_html = email_config.get( "registration_template_success_html", "registration_success.html" ) + email_add_threepid_template_success_html = email_config.get( + "add_threepid_template_success_html", "add_threepid_success.html" + ) # Check templates exist for f in [ @@ -191,9 +204,14 @@ class EmailConfig(Config): self.email_password_reset_template_text, self.email_registration_template_html, self.email_registration_template_text, + self.email_add_threepid_template_html, + self.email_add_threepid_template_text, self.email_password_reset_template_failure_html, + self.email_registration_template_failure_html, + self.email_add_threepid_template_failure_html, email_password_reset_template_success_html, email_registration_template_success_html, + email_add_threepid_template_success_html, ]: p = os.path.join(self.email_template_dir, f) if not os.path.isfile(p): @@ -212,6 +230,12 @@ class EmailConfig(Config): self.email_registration_template_success_html_content = self.read_file( filepath, "email.registration_template_success_html" ) + filepath = os.path.join( + self.email_template_dir, email_add_threepid_template_success_html + ) + self.email_add_threepid_template_success_html_content = self.read_file( + filepath, "email.add_threepid_template_success_html" + ) if self.email_enable_notifs: required = [ @@ -328,6 +352,12 @@ class EmailConfig(Config): # #registration_template_html: registration.html # #registration_template_text: registration.txt # + # # Templates for validation emails sent by the homeserver when adding an email to + # # your user account + # # + # #add_threepid_template_html: add_threepid.html + # #add_threepid_template_text: add_threepid.txt + # # # Templates for password reset success and failure pages that a user # # will see after attempting to reset their password # # @@ -339,6 +369,12 @@ class EmailConfig(Config): # # # #registration_template_success_html: registration_success.html # #registration_template_failure_html: registration_failure.html + # + # # Templates for success and failure pages that a user will see after attempting + # # to add an email or phone to their account + # # + # #add_threepid_success_html: add_threepid_success.html + # #add_threepid_failure_html: add_threepid_failure.html """ diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 512f38e5a6..156719e308 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -81,11 +81,10 @@ class IdentityHandler(BaseHandler): given identity server Args: - id_server (str|None): The identity server to validate 3PIDs against. If None, - we will attempt to extract id_server creds + id_server (str): The identity server to validate 3PIDs against. Must be a + complete URL including the protocol (http(s)://) creds (dict[str, str]): Dictionary containing the following keys: - * id_server|idServer: An optional domain name of an identity server * client_secret|clientSecret: A unique secret str provided by the client * sid: The ID of the validation session @@ -104,20 +103,10 @@ class IdentityHandler(BaseHandler): raise SynapseError( 400, "Missing param session_id in creds", errcode=Codes.MISSING_PARAM ) - if not id_server: - # Attempt to get the id_server from the creds dict - id_server = creds.get("id_server") or creds.get("idServer") - if not id_server: - raise SynapseError( - 400, "Missing param id_server in creds", errcode=Codes.MISSING_PARAM - ) query_params = {"sid": session_id, "client_secret": client_secret} - url = "https://%s%s" % ( - id_server, - "/_matrix/identity/api/v1/3pid/getValidated3pid", - ) + url = id_server + "/_matrix/identity/api/v1/3pid/getValidated3pid" data = yield self.http_client.get_json(url, query_params) return data if "medium" in data else None diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 2437235dc4..5a4fc78b4c 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -179,6 +179,35 @@ class Mailer(object): template_vars, ) + @defer.inlineCallbacks + def send_add_threepid_mail(self, email_address, token, client_secret, sid): + """Send an email with a validation link to a user for adding a 3pid to their account + + Args: + email_address (str): Email address we're sending the validation link to + + token (str): Unique token generated by the server to verify the email was received + + client_secret (str): Unique token generated by the client to group together + multiple email sending attempts + + sid (str): The generated session ID + """ + params = {"token": token, "client_secret": client_secret, "sid": sid} + link = ( + self.hs.config.public_baseurl + + "_matrix/client/unstable/add_threepid/email/submit_token?%s" + % urllib.parse.urlencode(params) + ) + + template_vars = {"link": link} + + yield self.send_email( + email_address, + "[%s] Validate Your Email" % self.hs.config.server_name, + template_vars, + ) + @defer.inlineCallbacks def send_notification_mail( self, app_id, user_id, email_address, push_actions, reason diff --git a/synapse/res/templates/add_threepid.html b/synapse/res/templates/add_threepid.html new file mode 100644 index 0000000000..cc4ab07e09 --- /dev/null +++ b/synapse/res/templates/add_threepid.html @@ -0,0 +1,9 @@ + + +

A request to add an email address to your Matrix account has been received. If this was you, please click the link below to confirm adding this email:

+ + {{ link }} + +

If this was not you, you can safely ignore this email. Thank you.

+ + diff --git a/synapse/res/templates/add_threepid.txt b/synapse/res/templates/add_threepid.txt new file mode 100644 index 0000000000..a60c1ff659 --- /dev/null +++ b/synapse/res/templates/add_threepid.txt @@ -0,0 +1,6 @@ +A request to add an email address to your Matrix account has been received. If this was you, +please click the link below to confirm adding this email: + +{{ link }} + +If this was not you, you can safely ignore this email. Thank you. diff --git a/synapse/res/templates/add_threepid_failure.html b/synapse/res/templates/add_threepid_failure.html new file mode 100644 index 0000000000..441d11c846 --- /dev/null +++ b/synapse/res/templates/add_threepid_failure.html @@ -0,0 +1,8 @@ + + + +

The request failed for the following reason: {{ failure_reason }}.

+ +

No changes have been made to your account.

+ + diff --git a/synapse/res/templates/add_threepid_success.html b/synapse/res/templates/add_threepid_success.html new file mode 100644 index 0000000000..fbd6e4018f --- /dev/null +++ b/synapse/res/templates/add_threepid_success.html @@ -0,0 +1,6 @@ + + + +

Your email has now been validated, please return to your client. You may now close this window.

+ + diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 3c5b23dc80..1139bb156c 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -21,7 +21,12 @@ from six.moves import http_client from twisted.internet import defer from synapse.api.constants import LoginType -from synapse.api.errors import Codes, SynapseError, ThreepidValidationError +from synapse.api.errors import ( + Codes, + HttpResponseException, + SynapseError, + ThreepidValidationError, +) from synapse.config.emailconfig import ThreepidBehaviour from synapse.http.server import finish_request from synapse.http.servlet import ( @@ -103,16 +108,9 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND) if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - # Have the configured identity server handle the request - if not self.hs.config.account_threepid_delegate_email: - logger.warn( - "No upstream email account_threepid_delegate configured on the server to " - "handle this request" - ) - raise SynapseError( - 400, "Password reset by email is not supported on this homeserver" - ) + assert self.hs.config.account_threepid_delegate_email + # Have the configured identity server handle the request ret = yield self.identity_handler.requestEmailToken( self.hs.config.account_threepid_delegate_email, email, @@ -214,6 +212,11 @@ class PasswordResetSubmitTokenServlet(RestServlet): self.config = hs.config self.clock = hs.get_clock() self.store = hs.get_datastore() + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + self.failure_email_template, = load_jinja2_templates( + self.config.email_template_dir, + [self.config.email_password_reset_template_failure_html], + ) @defer.inlineCallbacks def on_GET(self, request, medium): @@ -261,13 +264,8 @@ class PasswordResetSubmitTokenServlet(RestServlet): request.setResponseCode(e.code) # Show a failure page with a reason - html_template, = load_jinja2_templates( - self.config.email_template_dir, - [self.config.email_password_reset_template_failure_html], - ) - template_vars = {"failure_reason": e.msg} - html = html_template.render(**template_vars) + html = self.failure_email_template.render(**template_vars) request.write(html.encode("utf-8")) finish_request(request) @@ -399,13 +397,35 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.store = self.hs.get_datastore() + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + template_html, template_text = load_jinja2_templates( + self.config.email_template_dir, + [ + self.config.email_add_threepid_template_html, + self.config.email_add_threepid_template_text, + ], + public_baseurl=self.config.public_baseurl, + ) + self.mailer = Mailer( + hs=self.hs, + app_name=self.config.email_app_name, + template_html=template_html, + template_text=template_text, + ) + @defer.inlineCallbacks def on_POST(self, request): + if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.local_threepid_handling_disabled_due_to_email_config: + logger.warn( + "Adding emails have been disabled due to lack of an email config" + ) + raise SynapseError( + 400, "Adding an email to your account is disabled on this server" + ) + body = parse_json_object_from_request(request) - assert_params_in_dict( - body, ["id_server", "client_secret", "email", "send_attempt"] - ) - id_server = "https://" + body["id_server"] # Assume https + assert_params_in_dict(body, ["client_secret", "email", "send_attempt"]) client_secret = body["client_secret"] email = body["email"] send_attempt = body["send_attempt"] @@ -425,9 +445,30 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): if existing_user_id is not None: raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - ret = yield self.identity_handler.requestEmailToken( - id_server, email, client_secret, send_attempt, next_link - ) + if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + assert self.hs.config.account_threepid_delegate_email + + # Have the configured identity server handle the request + ret = yield self.identity_handler.requestEmailToken( + self.hs.config.account_threepid_delegate_email, + email, + client_secret, + send_attempt, + next_link, + ) + else: + # Send threepid validation emails from Synapse + sid = yield self.identity_handler.send_threepid_validation( + email, + client_secret, + send_attempt, + self.mailer.send_add_threepid_mail, + next_link, + ) + + # Wrap the session id in a JSON object + ret = {"sid": sid} + return 200, ret @@ -471,9 +512,86 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): ret = yield self.identity_handler.requestMsisdnToken( id_server, country, phone_number, client_secret, send_attempt, next_link ) + return 200, ret +class AddThreepidSubmitTokenServlet(RestServlet): + """Handles 3PID validation token submission for adding an email to a user's account""" + + PATTERNS = client_patterns( + "/add_threepid/email/submit_token$", releases=(), unstable=True + ) + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super().__init__() + self.config = hs.config + self.clock = hs.get_clock() + self.store = hs.get_datastore() + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + self.failure_email_template, = load_jinja2_templates( + self.config.email_template_dir, + [self.config.email_add_threepid_template_failure_html], + ) + + @defer.inlineCallbacks + def on_GET(self, request): + if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.local_threepid_handling_disabled_due_to_email_config: + logger.warn( + "Adding emails have been disabled due to lack of an email config" + ) + raise SynapseError( + 400, "Adding an email to your account is disabled on this server" + ) + elif self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + raise SynapseError( + 400, + "This homeserver is not validating threepids. Use an identity server " + "instead.", + ) + + sid = parse_string(request, "sid", required=True) + client_secret = parse_string(request, "client_secret", required=True) + token = parse_string(request, "token", required=True) + + # Attempt to validate a 3PID session + try: + # Mark the session as valid + next_link = yield self.store.validate_threepid_session( + sid, client_secret, token, self.clock.time_msec() + ) + + # Perform a 302 redirect if next_link is set + if next_link: + if next_link.startswith("file:///"): + logger.warn( + "Not redirecting to next_link as it is a local file: address" + ) + else: + request.setResponseCode(302) + request.setHeader("Location", next_link) + finish_request(request) + return None + + # Otherwise show the success template + html = self.config.email_add_threepid_template_success_html_content + request.setResponseCode(200) + except ThreepidValidationError as e: + request.setResponseCode(e.code) + + # Show a failure page with a reason + template_vars = {"failure_reason": e.msg} + html = self.failure_email_template.render(**template_vars) + + request.write(html.encode("utf-8")) + finish_request(request) + + class ThreepidRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid$") @@ -495,6 +613,8 @@ class ThreepidRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request): + requester = yield self.auth.get_user_by_req(request) + user_id = requester.user.to_string() body = parse_json_object_from_request(request) threepid_creds = body.get("threePidCreds") or body.get("three_pid_creds") @@ -502,26 +622,85 @@ class ThreepidRestServlet(RestServlet): raise SynapseError( 400, "Missing param three_pid_creds", Codes.MISSING_PARAM ) + assert_params_in_dict(threepid_creds, ["client_secret", "sid"]) - requester = yield self.auth.get_user_by_req(request) - user_id = requester.user.to_string() + client_secret = threepid_creds["client_secret"] + sid = threepid_creds["sid"] - # Specify None as the identity server to retrieve it from the request body instead - threepid = yield self.identity_handler.threepid_from_creds(None, threepid_creds) + # We don't actually know which medium this 3PID is. Thus we first assume it's email, + # and if validation fails we try msisdn + validation_session = None - if not threepid: - raise SynapseError(400, "Failed to auth 3pid", Codes.THREEPID_AUTH_FAILED) + # Try to validate as email + if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + # Ask our delegated email identity server + try: + validation_session = yield self.identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_email, threepid_creds + ) + except HttpResponseException: + logger.debug( + "%s reported non-validated threepid: %s", + self.hs.config.account_threepid_delegate_email, + threepid_creds, + ) + elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + # Get a validated session matching these details + validation_session = yield self.datastore.get_threepid_validation_session( + "email", client_secret, sid=sid, validated=True + ) - for reqd in ["medium", "address", "validated_at"]: - if reqd not in threepid: - logger.warn("Couldn't add 3pid: invalid response from ID server") - raise SynapseError(500, "Invalid response from ID Server") + # Old versions of Sydent return a 200 http code even on a failed validation check. + # Thus, in addition to the HttpResponseException check above (which checks for + # non-200 errors), we need to make sure validation_session isn't actually an error, + # identified by containing an "error" key + # See https://github.com/matrix-org/sydent/issues/215 for details + if validation_session and "error" not in validation_session: + yield self._add_threepid_to_account(user_id, validation_session) + return 200, {} - yield self.auth_handler.add_threepid( - user_id, threepid["medium"], threepid["address"], threepid["validated_at"] + # Try to validate as msisdn + if self.hs.config.account_threepid_delegate_msisdn: + # Ask our delegated msisdn identity server + try: + validation_session = yield self.identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_msisdn, threepid_creds + ) + except HttpResponseException: + logger.debug( + "%s reported non-validated threepid: %s", + self.hs.config.account_threepid_delegate_email, + threepid_creds, + ) + + # Check that validation_session isn't actually an error due to old Sydent instances + # See explanatory comment above + if validation_session and "error" not in validation_session: + yield self._add_threepid_to_account(user_id, validation_session) + return 200, {} + + raise SynapseError( + 400, "No validated 3pid session found", Codes.THREEPID_AUTH_FAILED ) - return 200, {} + @defer.inlineCallbacks + def _add_threepid_to_account(self, user_id, validation_session): + """Add a threepid wrapped in a validation_session dict to an account + + Args: + user_id (str): The mxid of the user to add this 3PID to + + validation_session (dict): A dict containing the following: + * medium - medium of the threepid + * address - address of the threepid + * validated_at - timestamp of when the validation occurred + """ + yield self.auth_handler.add_threepid( + user_id, + validation_session["medium"], + validation_session["address"], + validation_session["validated_at"], + ) class ThreepidUnbindRestServlet(RestServlet): @@ -613,6 +792,7 @@ def register_servlets(hs, http_server): DeactivateAccountRestServlet(hs).register(http_server) EmailThreepidRequestTokenRestServlet(hs).register(http_server) MsisdnThreepidRequestTokenRestServlet(hs).register(http_server) + AddThreepidSubmitTokenServlet(hs).register(http_server) ThreepidRestServlet(hs).register(http_server) ThreepidUnbindRestServlet(hs).register(http_server) ThreepidDeleteRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 5c7a5f3579..34276ea3fa 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -131,15 +131,9 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - if not self.hs.config.account_threepid_delegate_email: - logger.warn( - "No upstream email account_threepid_delegate configured on the server to " - "handle this request" - ) - raise SynapseError( - 400, "Registration by email is not supported on this homeserver" - ) + assert self.hs.config.account_threepid_delegate_email + # Have the configured identity server handle the request ret = yield self.identity_handler.requestEmailToken( self.hs.config.account_threepid_delegate_email, email, @@ -246,6 +240,12 @@ class RegistrationSubmitTokenServlet(RestServlet): self.clock = hs.get_clock() self.store = hs.get_datastore() + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + self.failure_email_template, = load_jinja2_templates( + self.config.email_template_dir, + [self.config.email_registration_template_failure_html], + ) + @defer.inlineCallbacks def on_GET(self, request, medium): if medium != "email": @@ -289,17 +289,11 @@ class RegistrationSubmitTokenServlet(RestServlet): request.setResponseCode(200) except ThreepidValidationError as e: - # Show a failure page with a reason request.setResponseCode(e.code) # Show a failure page with a reason - html_template, = load_jinja2_templates( - self.config.email_template_dir, - [self.config.email_registration_template_failure_html], - ) - template_vars = {"failure_reason": e.msg} - html = html_template.render(**template_vars) + html = self.failure_email_template.render(**template_vars) request.write(html.encode("utf-8")) finish_request(request) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 109052fa41..da27ad76b6 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -24,7 +24,7 @@ from six.moves import range from twisted.internet import defer from synapse.api.constants import UserTypes -from synapse.api.errors import Codes, StoreError, ThreepidValidationError +from synapse.api.errors import Codes, StoreError, SynapseError, ThreepidValidationError from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage import background_updates from synapse.storage._base import SQLBaseStore @@ -661,18 +661,31 @@ class RegistrationWorkerStore(SQLBaseStore): medium (str|None): The medium of the 3PID address (str|None): The address of the 3PID sid (str|None): The ID of the validation session - client_secret (str|None): A unique string provided by the client to - help identify this validation attempt + client_secret (str): A unique string provided by the client to help identify this + validation attempt validated (bool|None): Whether sessions should be filtered by whether they have been validated already or not. None to perform no filtering Returns: - deferred {str, int}|None: A dict containing the - latest session_id and send_attempt count for this 3PID. - Otherwise None if there hasn't been a previous attempt + Deferred[dict|None]: A dict containing the following: + * address - address of the 3pid + * medium - medium of the 3pid + * client_secret - a secret provided by the client for this validation session + * session_id - ID of the validation session + * send_attempt - a number serving to dedupe send attempts for this session + * validated_at - timestamp of when this session was validated if so + + Otherwise None if a validation session is not found """ - keyvalues = {"medium": medium, "client_secret": client_secret} + if not client_secret: + raise SynapseError( + 400, "Missing parameter: client_secret", errcode=Codes.MISSING_PARAM + ) + + keyvalues = {"client_secret": client_secret} + if medium: + keyvalues["medium"] = medium if address: keyvalues["address"] = address if sid: @@ -1209,6 +1222,10 @@ class RegistrationStore( current_ts (int): The current unix time in milliseconds. Used for checking token expiry status + Raises: + ThreepidValidationError: if a matching validation token was not found or has + expired + Returns: deferred str|None: A str representing a link to redirect the user to if there is one. -- cgit 1.4.1 From 7ef319aefe6a44ee0bdfa4bdc4c05ce8ef96cb18 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Sep 2019 12:28:01 +0100 Subject: fix broken copyrights --- synapse/config/repository.py | 2 +- synapse/config/workers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 34f1a9a92d..52e014608a 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2014, 2015 matrix.org +# Copyright 2014, 2015 OpenMarket Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/config/workers.py b/synapse/config/workers.py index bc0fc165e3..1ec4998625 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2016 matrix.org +# Copyright 2016 OpenMarket Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. -- cgit 1.4.1 From 885a4726b7f9cdf02187b92b43f639e2cbfbb12e Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 15:37:23 +0200 Subject: Return timeout error to user for identity server calls (#6073) --- changelog.d/6073.feature | 1 + synapse/handlers/identity.py | 16 ++++++++++++++-- synapse/handlers/room_member.py | 32 +++++++++++++++++++++++--------- 3 files changed, 38 insertions(+), 11 deletions(-) create mode 100644 changelog.d/6073.feature (limited to 'synapse') diff --git a/changelog.d/6073.feature b/changelog.d/6073.feature new file mode 100644 index 0000000000..15d9933891 --- /dev/null +++ b/changelog.d/6073.feature @@ -0,0 +1 @@ +Return a clearer error message when a timeout occurs when attempting to contact an identity server. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 156719e308..cd4700b521 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -22,6 +22,7 @@ import logging from canonicaljson import json from twisted.internet import defer +from twisted.internet.error import TimeoutError from synapse.api.errors import ( CodeMessageException, @@ -108,7 +109,10 @@ class IdentityHandler(BaseHandler): url = id_server + "/_matrix/identity/api/v1/3pid/getValidated3pid" - data = yield self.http_client.get_json(url, query_params) + try: + data = yield self.http_client.get_json(url, query_params) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") return data if "medium" in data else None @defer.inlineCallbacks @@ -171,6 +175,8 @@ class IdentityHandler(BaseHandler): if e.code != 404 or not use_v2: logger.error("3PID bind failed with Matrix error: %r", e) raise e.to_synapse_error() + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") except CodeMessageException as e: data = json.loads(e.msg) # XXX WAT? return data @@ -261,7 +267,9 @@ class IdentityHandler(BaseHandler): logger.warn("Received %d response while unbinding threepid", e.code) else: logger.error("Failed to unbind threepid on identity server: %s", e) - raise SynapseError(502, "Failed to contact identity server") + raise SynapseError(500, "Failed to contact identity server") + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") yield self.store.remove_user_bound_threepid( user_id=mxid, @@ -394,6 +402,8 @@ class IdentityHandler(BaseHandler): except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") @defer.inlineCallbacks def requestMsisdnToken( @@ -446,6 +456,8 @@ class IdentityHandler(BaseHandler): except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") def create_id_access_token_header(id_access_token): diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 35450feb6f..39df0f128d 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -25,6 +25,7 @@ from signedjson.sign import verify_signed_json from unpaddedbase64 import decode_base64 from twisted.internet import defer +from twisted.internet.error import TimeoutError from synapse import types from synapse.api.constants import EventTypes, Membership @@ -756,7 +757,8 @@ class RoomMemberHandler(object): raise AuthError(401, "No signatures on 3pid binding") yield self._verify_any_signature(data, id_server) return data["mxid"] - + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") except IOError as e: logger.warning("Error from v1 identity server lookup: %s" % (e,)) @@ -777,10 +779,13 @@ class RoomMemberHandler(object): Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. """ # Check what hashing details are supported by this identity server - hash_details = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), - {"access_token": id_access_token}, - ) + try: + hash_details = yield self.simple_http_client.get_json( + "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), + {"access_token": id_access_token}, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") if not isinstance(hash_details, dict): logger.warning( @@ -851,6 +856,8 @@ class RoomMemberHandler(object): }, headers=headers, ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") except Exception as e: logger.warning("Error when performing a v2 3pid lookup: %s", e) raise SynapseError( @@ -873,10 +880,13 @@ class RoomMemberHandler(object): if server_hostname not in data["signatures"]: raise AuthError(401, "No signature from server %s" % (server_hostname,)) for key_name, signature in data["signatures"][server_hostname].items(): - key_data = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/api/v1/pubkey/%s" - % (id_server_scheme, server_hostname, key_name) - ) + try: + key_data = yield self.simple_http_client.get_json( + "%s%s/_matrix/identity/api/v1/pubkey/%s" + % (id_server_scheme, server_hostname, key_name) + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") if "public_key" not in key_data: raise AuthError( 401, "No public key named %s from %s" % (key_name, server_hostname) @@ -1051,6 +1061,8 @@ class RoomMemberHandler(object): invite_config, {"Authorization": create_id_access_token_header(id_access_token)}, ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") except HttpResponseException as e: if e.code != 404: logger.info("Failed to POST %s with JSON: %s", url, e) @@ -1067,6 +1079,8 @@ class RoomMemberHandler(object): data = yield self.simple_http_client.post_json_get_json( url, invite_config ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") except HttpResponseException as e: logger.warning( "Error trying to call /store-invite on %s%s: %s", -- cgit 1.4.1 From 1c9feadf4bf0755162d0d210bea398a3fb690ab6 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 15:38:19 +0200 Subject: Generalize email sending logging (#6075) In ancient times Synapse would only send emails when it was notifying a user about a message they received... Now it can do all sorts of neat things! Change the logging so it's not just about notifications. --- changelog.d/6075.misc | 1 + synapse/push/mailer.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6075.misc (limited to 'synapse') diff --git a/changelog.d/6075.misc b/changelog.d/6075.misc new file mode 100644 index 0000000000..914e56bcfe --- /dev/null +++ b/changelog.d/6075.misc @@ -0,0 +1 @@ +Change mailer logging to reflect Synapse doesn't just do chat notifications by email now. \ No newline at end of file diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 5a4fc78b4c..5b16ab4ae8 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -311,7 +311,7 @@ class Mailer(object): multipart_msg.attach(text_part) multipart_msg.attach(html_part) - logger.info("Sending email notification to %s" % email_address) + logger.info("Sending email to %s" % email_address) yield make_deferred_yieldable( self.sendmail( -- cgit 1.4.1 From 1b519e0272a13649d442aad2a10c9a3b39c2d200 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 16:38:38 +0200 Subject: Disable /register/available if registration is disabled (#6082) Fixes #6066 This register endpoint should be disabled if registration is disabled, otherwise we're giving anyone the ability to check if a username exists on a server when we don't need to be. Error code is 403 (Forbidden) as that's the same returned by /register when registration is disabled. --- changelog.d/6082.feature | 1 + synapse/rest/client/v2_alpha/register.py | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 changelog.d/6082.feature (limited to 'synapse') diff --git a/changelog.d/6082.feature b/changelog.d/6082.feature new file mode 100644 index 0000000000..c30662b608 --- /dev/null +++ b/changelog.d/6082.feature @@ -0,0 +1 @@ +Return 403 on `/register/available` if registration has been disabled. \ No newline at end of file diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 34276ea3fa..e99b1f5c45 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -328,6 +328,11 @@ class UsernameAvailabilityRestServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request): + if not self.hs.config.enable_registration: + raise SynapseError( + 403, "Registration has been disabled", errcode=Codes.FORBIDDEN + ) + ip = self.hs.get_ip_from_request(request) with self.ratelimiter.ratelimit(ip) as wait_deferred: yield wait_deferred -- cgit 1.4.1 From 30af161af27146cc44152292060c7005a6b8546b Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 17:50:27 +0200 Subject: Implement MSC2290 (#6043) Implements MSC2290. This PR adds two new endpoints, /unstable/account/3pid/add and /unstable/account/3pid/bind. Depending on the progress of that MSC the unstable prefix may go away. This PR also removes the blacklist on some 3PID tests which occurs in #6042, as the corresponding Sytest PR changes them to use the new endpoints. Finally, it also modifies the account deactivation code such that it doesn't just try to deactivate 3PIDs that were bound to the user's account, but any 3PIDs that were bound through the homeserver on that user's account. --- changelog.d/6043.feature | 1 + synapse/handlers/deactivate_account.py | 4 +- synapse/handlers/identity.py | 134 +++++++++++++++---------- synapse/rest/client/v2_alpha/account.py | 161 +++++++++++++++++-------------- synapse/rest/client/v2_alpha/register.py | 6 ++ synapse/storage/registration.py | 22 ++++- sytest-blacklist | 9 -- 7 files changed, 203 insertions(+), 134 deletions(-) create mode 100644 changelog.d/6043.feature (limited to 'synapse') diff --git a/changelog.d/6043.feature b/changelog.d/6043.feature new file mode 100644 index 0000000000..cd27b0400b --- /dev/null +++ b/changelog.d/6043.feature @@ -0,0 +1 @@ +Implement new Client Server API endpoints `/account/3pid/add` and `/account/3pid/bind` as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290). \ No newline at end of file diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 5f804d1f13..d83912c9a4 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -73,7 +73,9 @@ class DeactivateAccountHandler(BaseHandler): # unbinding identity_server_supports_unbinding = True - threepids = yield self.store.user_get_threepids(user_id) + # Retrieve the 3PIDs this user has bound to an identity server + threepids = yield self.store.user_get_bound_threepids(user_id) + for threepid in threepids: try: result = yield self._identity_handler.try_unbind_threepid( diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index cd4700b521..d50d485e06 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -30,6 +30,7 @@ from synapse.api.errors import ( HttpResponseException, SynapseError, ) +from synapse.config.emailconfig import ThreepidBehaviour from synapse.util.stringutils import random_string from ._base import BaseHandler @@ -45,36 +46,6 @@ class IdentityHandler(BaseHandler): self.federation_http_client = hs.get_http_client() self.hs = hs - def _extract_items_from_creds_dict(self, creds): - """ - Retrieve entries from a "credentials" dictionary - - Args: - creds (dict[str, str]): Dictionary of credentials that contain the following keys: - * client_secret|clientSecret: A unique secret str provided by the client - * id_server|idServer: the domain of the identity server to query - * id_access_token: The access token to authenticate to the identity - server with. - - Returns: - tuple(str, str, str|None): A tuple containing the client_secret, the id_server, - and the id_access_token value if available. - """ - client_secret = creds.get("client_secret") or creds.get("clientSecret") - if not client_secret: - raise SynapseError( - 400, "No client_secret in creds", errcode=Codes.MISSING_PARAM - ) - - id_server = creds.get("id_server") or creds.get("idServer") - if not id_server: - raise SynapseError( - 400, "No id_server in creds", errcode=Codes.MISSING_PARAM - ) - - id_access_token = creds.get("id_access_token") - return client_secret, id_server, id_access_token - @defer.inlineCallbacks def threepid_from_creds(self, id_server, creds): """ @@ -113,35 +84,50 @@ class IdentityHandler(BaseHandler): data = yield self.http_client.get_json(url, query_params) except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") - return data if "medium" in data else None + except HttpResponseException as e: + logger.info( + "%s returned %i for threepid validation for: %s", + id_server, + e.code, + creds, + ) + return None + + # Old versions of Sydent return a 200 http code even on a failed validation + # check. Thus, in addition to the HttpResponseException check above (which + # checks for non-200 errors), we need to make sure validation_session isn't + # actually an error, identified by the absence of a "medium" key + # See https://github.com/matrix-org/sydent/issues/215 for details + if "medium" in data: + return data + + logger.info("%s reported non-validated threepid: %s", id_server, creds) + return None @defer.inlineCallbacks - def bind_threepid(self, creds, mxid, use_v2=True): + def bind_threepid( + self, client_secret, sid, mxid, id_server, id_access_token=None, use_v2=True + ): """Bind a 3PID to an identity server Args: - creds (dict[str, str]): Dictionary of credentials that contain the following keys: - * client_secret|clientSecret: A unique secret str provided by the client - * id_server|idServer: the domain of the identity server to query - * id_access_token: The access token to authenticate to the identity - server with. Required if use_v2 is true + client_secret (str): A unique secret provided by the client + + sid (str): The ID of the validation session + mxid (str): The MXID to bind the 3PID to - use_v2 (bool): Whether to use v2 Identity Service API endpoints + + id_server (str): The domain of the identity server to query + + id_access_token (str): The access token to authenticate to the identity + server with, if necessary. Required if use_v2 is true + + use_v2 (bool): Whether to use v2 Identity Service API endpoints. Defaults to True Returns: Deferred[dict]: The response from the identity server """ - logger.debug("binding threepid %r to %s", creds, mxid) - - client_secret, id_server, id_access_token = self._extract_items_from_creds_dict( - creds - ) - - sid = creds.get("sid") - if not sid: - raise SynapseError( - 400, "No sid in three_pid_creds", errcode=Codes.MISSING_PARAM - ) + logger.debug("Proxying threepid bind request for %s to %s", mxid, id_server) # If an id_access_token is not supplied, force usage of v1 if id_access_token is None: @@ -160,7 +146,6 @@ class IdentityHandler(BaseHandler): data = yield self.http_client.post_json_get_json( bind_url, bind_data, headers=headers ) - logger.debug("bound threepid %r to %s", creds, mxid) # Remember where we bound the threepid yield self.store.add_user_bound_threepid( @@ -182,7 +167,10 @@ class IdentityHandler(BaseHandler): return data logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", bind_url) - return (yield self.bind_threepid(creds, mxid, use_v2=False)) + res = yield self.bind_threepid( + client_secret, sid, mxid, id_server, id_access_token, use_v2=False + ) + return res @defer.inlineCallbacks def try_unbind_threepid(self, mxid, threepid): @@ -459,6 +447,50 @@ class IdentityHandler(BaseHandler): except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") + @defer.inlineCallbacks + def validate_threepid_session(self, client_secret, sid): + """Validates a threepid session with only the client secret and session ID + Tries validating against any configured account_threepid_delegates as well as locally. + + Args: + client_secret (str): A secret provided by the client + + sid (str): The ID of the session + + Returns: + Dict[str, str|int] if validation was successful, otherwise None + """ + # XXX: We shouldn't need to keep wrapping and unwrapping this value + threepid_creds = {"client_secret": client_secret, "sid": sid} + + # We don't actually know which medium this 3PID is. Thus we first assume it's email, + # and if validation fails we try msisdn + validation_session = None + + # Try to validate as email + if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + # Ask our delegated email identity server + validation_session = yield self.threepid_from_creds( + self.hs.config.account_threepid_delegate_email, threepid_creds + ) + elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + # Get a validated session matching these details + validation_session = yield self.store.get_threepid_validation_session( + "email", client_secret, sid=sid, validated=True + ) + + if validation_session: + return validation_session + + # Try to validate as msisdn + if self.hs.config.account_threepid_delegate_msisdn: + # Ask our delegated msisdn identity server + validation_session = yield self.threepid_from_creds( + self.hs.config.account_threepid_delegate_msisdn, threepid_creds + ) + + return validation_session + def create_id_access_token_header(id_access_token): """Create an Authorization header for passing to SimpleHttpClient as the header value diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 1139bb156c..b8c48dc8f1 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -21,12 +21,7 @@ from six.moves import http_client from twisted.internet import defer from synapse.api.constants import LoginType -from synapse.api.errors import ( - Codes, - HttpResponseException, - SynapseError, - ThreepidValidationError, -) +from synapse.api.errors import Codes, SynapseError, ThreepidValidationError from synapse.config.emailconfig import ThreepidBehaviour from synapse.http.server import finish_request from synapse.http.servlet import ( @@ -485,10 +480,8 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): def on_POST(self, request): body = parse_json_object_from_request(request) assert_params_in_dict( - body, - ["id_server", "client_secret", "country", "phone_number", "send_attempt"], + body, ["client_secret", "country", "phone_number", "send_attempt"] ) - id_server = "https://" + body["id_server"] # Assume https client_secret = body["client_secret"] country = body["country"] phone_number = body["phone_number"] @@ -509,8 +502,23 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): if existing_user_id is not None: raise SynapseError(400, "MSISDN is already in use", Codes.THREEPID_IN_USE) + if not self.hs.config.account_threepid_delegate_msisdn: + logger.warn( + "No upstream msisdn account_threepid_delegate configured on the server to " + "handle this request" + ) + raise SynapseError( + 400, + "Adding phone numbers to user account is not supported by this homeserver", + ) + ret = yield self.identity_handler.requestMsisdnToken( - id_server, country, phone_number, client_secret, send_attempt, next_link + self.hs.config.account_threepid_delegate_msisdn, + country, + phone_number, + client_secret, + send_attempt, + next_link, ) return 200, ret @@ -627,81 +635,88 @@ class ThreepidRestServlet(RestServlet): client_secret = threepid_creds["client_secret"] sid = threepid_creds["sid"] - # We don't actually know which medium this 3PID is. Thus we first assume it's email, - # and if validation fails we try msisdn - validation_session = None - - # Try to validate as email - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - # Ask our delegated email identity server - try: - validation_session = yield self.identity_handler.threepid_from_creds( - self.hs.config.account_threepid_delegate_email, threepid_creds - ) - except HttpResponseException: - logger.debug( - "%s reported non-validated threepid: %s", - self.hs.config.account_threepid_delegate_email, - threepid_creds, - ) - elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: - # Get a validated session matching these details - validation_session = yield self.datastore.get_threepid_validation_session( - "email", client_secret, sid=sid, validated=True - ) - - # Old versions of Sydent return a 200 http code even on a failed validation check. - # Thus, in addition to the HttpResponseException check above (which checks for - # non-200 errors), we need to make sure validation_session isn't actually an error, - # identified by containing an "error" key - # See https://github.com/matrix-org/sydent/issues/215 for details - if validation_session and "error" not in validation_session: - yield self._add_threepid_to_account(user_id, validation_session) + validation_session = yield self.identity_handler.validate_threepid_session( + client_secret, sid + ) + if validation_session: + yield self.auth_handler.add_threepid( + user_id, + validation_session["medium"], + validation_session["address"], + validation_session["validated_at"], + ) return 200, {} - # Try to validate as msisdn - if self.hs.config.account_threepid_delegate_msisdn: - # Ask our delegated msisdn identity server - try: - validation_session = yield self.identity_handler.threepid_from_creds( - self.hs.config.account_threepid_delegate_msisdn, threepid_creds - ) - except HttpResponseException: - logger.debug( - "%s reported non-validated threepid: %s", - self.hs.config.account_threepid_delegate_email, - threepid_creds, - ) + raise SynapseError( + 400, "No validated 3pid session found", Codes.THREEPID_AUTH_FAILED + ) + - # Check that validation_session isn't actually an error due to old Sydent instances - # See explanatory comment above - if validation_session and "error" not in validation_session: - yield self._add_threepid_to_account(user_id, validation_session) - return 200, {} +class ThreepidAddRestServlet(RestServlet): + PATTERNS = client_patterns("/account/3pid/add$", releases=(), unstable=True) + + def __init__(self, hs): + super(ThreepidAddRestServlet, self).__init__() + self.hs = hs + self.identity_handler = hs.get_handlers().identity_handler + self.auth = hs.get_auth() + self.auth_handler = hs.get_auth_handler() + + @defer.inlineCallbacks + def on_POST(self, request): + requester = yield self.auth.get_user_by_req(request) + user_id = requester.user.to_string() + body = parse_json_object_from_request(request) + + assert_params_in_dict(body, ["client_secret", "sid"]) + client_secret = body["client_secret"] + sid = body["sid"] + + validation_session = yield self.identity_handler.validate_threepid_session( + client_secret, sid + ) + if validation_session: + yield self.auth_handler.add_threepid( + user_id, + validation_session["medium"], + validation_session["address"], + validation_session["validated_at"], + ) + return 200, {} raise SynapseError( 400, "No validated 3pid session found", Codes.THREEPID_AUTH_FAILED ) + +class ThreepidBindRestServlet(RestServlet): + PATTERNS = client_patterns("/account/3pid/bind$", releases=(), unstable=True) + + def __init__(self, hs): + super(ThreepidBindRestServlet, self).__init__() + self.hs = hs + self.identity_handler = hs.get_handlers().identity_handler + self.auth = hs.get_auth() + @defer.inlineCallbacks - def _add_threepid_to_account(self, user_id, validation_session): - """Add a threepid wrapped in a validation_session dict to an account + def on_POST(self, request): + body = parse_json_object_from_request(request) - Args: - user_id (str): The mxid of the user to add this 3PID to + assert_params_in_dict(body, ["id_server", "sid", "client_secret"]) + id_server = body["id_server"] + sid = body["sid"] + client_secret = body["client_secret"] + id_access_token = body.get("id_access_token") # optional - validation_session (dict): A dict containing the following: - * medium - medium of the threepid - * address - address of the threepid - * validated_at - timestamp of when the validation occurred - """ - yield self.auth_handler.add_threepid( - user_id, - validation_session["medium"], - validation_session["address"], - validation_session["validated_at"], + requester = yield self.auth.get_user_by_req(request) + user_id = requester.user.to_string() + + yield self.identity_handler.bind_threepid( + client_secret, sid, user_id, id_server, id_access_token ) + return 200, {} + class ThreepidUnbindRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid/unbind$", releases=(), unstable=True) @@ -794,6 +809,8 @@ def register_servlets(hs, http_server): MsisdnThreepidRequestTokenRestServlet(hs).register(http_server) AddThreepidSubmitTokenServlet(hs).register(http_server) ThreepidRestServlet(hs).register(http_server) + ThreepidAddRestServlet(hs).register(http_server) + ThreepidBindRestServlet(hs).register(http_server) ThreepidUnbindRestServlet(hs).register(http_server) ThreepidDeleteRestServlet(hs).register(http_server) WhoamiRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index e99b1f5c45..135a70808f 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -246,6 +246,12 @@ class RegistrationSubmitTokenServlet(RestServlet): [self.config.email_registration_template_failure_html], ) + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + self.failure_email_template, = load_jinja2_templates( + self.config.email_template_dir, + [self.config.email_registration_template_failure_html], + ) + @defer.inlineCallbacks def on_GET(self, request, medium): if medium != "email": diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index da27ad76b6..805411a6b2 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -586,6 +586,26 @@ class RegistrationWorkerStore(SQLBaseStore): desc="add_user_bound_threepid", ) + def user_get_bound_threepids(self, user_id): + """Get the threepids that a user has bound to an identity server through the homeserver + The homeserver remembers where binds to an identity server occurred. Using this + method can retrieve those threepids. + + Args: + user_id (str): The ID of the user to retrieve threepids for + + Returns: + Deferred[list[dict]]: List of dictionaries containing the following: + medium (str): The medium of the threepid (e.g "email") + address (str): The address of the threepid (e.g "bob@example.com") + """ + return self._simple_select_list( + table="user_threepid_id_server", + keyvalues={"user_id": user_id}, + retcols=["medium", "address"], + desc="user_get_bound_threepids", + ) + def remove_user_bound_threepid(self, user_id, medium, address, id_server): """The server proxied an unbind request to the given identity server on behalf of the given user, so we remove the mapping of threepid to @@ -655,7 +675,7 @@ class RegistrationWorkerStore(SQLBaseStore): self, medium, client_secret, address=None, sid=None, validated=True ): """Gets a session_id and last_send_attempt (if available) for a - client_secret/medium/(address|session_id) combo + combination of validation metadata Args: medium (str|None): The medium of the 3PID diff --git a/sytest-blacklist b/sytest-blacklist index 04698cb068..11785fd43f 100644 --- a/sytest-blacklist +++ b/sytest-blacklist @@ -29,12 +29,3 @@ Enabling an unknown default rule fails with 404 # Blacklisted due to https://github.com/matrix-org/synapse/issues/1663 New federated private chats get full presence information (SYN-115) - -# Blacklisted temporarily due to https://github.com/matrix-org/matrix-doc/pull/2290 -# These sytests need to be updated with new endpoints, which will come in a later PR -# That PR will also remove this blacklist -Can bind 3PID via home server -Can bind and unbind 3PID via homeserver -3PIDs are unbound after account deactivation -Can bind and unbind 3PID via /unbind by specifying the identity server -Can bind and unbind 3PID via /unbind without specifying the identity server -- cgit 1.4.1 From 2ade05dca3d6da67e35c3a8ccdd278221f2566ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 14:16:10 +0100 Subject: Add last seen info to devices table. This allows us to purge old user_ips entries without having to preserve the latest last seen info for active devices. --- synapse/storage/client_ips.py | 15 +++++++++++++++ .../storage/schema/delta/56/devices_last_seen.sql | 21 +++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 synapse/storage/schema/delta/56/devices_last_seen.sql (limited to 'synapse') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 6db8c54077..4db2e7f481 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -354,6 +354,21 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): }, lock=False, ) + + # Technically an access token might not be associated with + # a device so we need to check. + if device_id: + self._simple_upsert_txn( + txn, + table="devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + values={ + "user_agent": user_agent, + "last_seen": last_seen, + "ip": ip, + }, + lock=False, + ) except Exception as e: # Failed to upsert, log and continue logger.error("Failed to insert client IP %r: %r", entry, e) diff --git a/synapse/storage/schema/delta/56/devices_last_seen.sql b/synapse/storage/schema/delta/56/devices_last_seen.sql new file mode 100644 index 0000000000..8818eeeb7e --- /dev/null +++ b/synapse/storage/schema/delta/56/devices_last_seen.sql @@ -0,0 +1,21 @@ +/* Copyright 2019 Matrix.org Foundation CIC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Track last seen information for a device in the devices table, rather +-- than relying on it being in the user_ips table (which we want to be able +-- to purge old entries from) +ALTER TABLE devices ADD COLUMN last_seen BIGINT; +ALTER TABLE devices ADD COLUMN ip TEXT; +ALTER TABLE devices ADD COLUMN user_agent TEXT; -- cgit 1.4.1 From ed80231ade20ce7881bb2026692fe3a6252f1c02 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 15:59:43 +0100 Subject: Add BG update to populate devices last seen info --- synapse/storage/client_ips.py | 52 ++++++++++++++++++++++ .../storage/schema/delta/56/devices_last_seen.sql | 3 ++ 2 files changed, 55 insertions(+) (limited to 'synapse') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 4db2e7f481..8839562269 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -85,6 +85,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "user_ips_drop_nonunique_index", self._remove_user_ip_nonunique ) + # Update the last seen info in devices. + self.register_background_update_handler( + "devices_last_seen", self._devices_last_seen_update + ) + # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) self._batch_row_update = {} @@ -485,3 +490,50 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): } for (access_token, ip), (user_agent, last_seen) in iteritems(results) ) + + @defer.inlineCallbacks + def _devices_last_seen_update(self, progress, batch_size): + """Background update to insert last seen info into devices table + """ + + last_user_id = progress.get("last_user_id", "") + last_device_id = progress.get("last_device_id", "") + + def _devices_last_seen_update_txn(txn): + sql = """ + SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices + INNER JOIN user_ips AS u USING (user_id, device_id) + WHERE user_id > ? OR (user_id = ? AND device_id > ?) + ORDER BY user_id ASC, device_id ASC + LIMIT ? + """ + txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + sql = """ + UPDATE devices + SET last_seen = ?, ip = ?, user_agent = ? + WHERE user_id = ? AND device_id = ? + """ + txn.execute_batch(sql, rows) + + _, _, _, user_id, device_id = rows[-1] + self._background_update_progress_txn( + txn, + "devices_last_seen", + {"last_user_id": user_id, "last_device_id": device_id}, + ) + + return len(rows) + + updated = yield self.runInteraction( + "_devices_last_seen_update", _devices_last_seen_update_txn + ) + + if not updated: + yield self._end_background_update("devices_last_seen") + + return updated diff --git a/synapse/storage/schema/delta/56/devices_last_seen.sql b/synapse/storage/schema/delta/56/devices_last_seen.sql index 8818eeeb7e..dfa902d0ba 100644 --- a/synapse/storage/schema/delta/56/devices_last_seen.sql +++ b/synapse/storage/schema/delta/56/devices_last_seen.sql @@ -19,3 +19,6 @@ ALTER TABLE devices ADD COLUMN last_seen BIGINT; ALTER TABLE devices ADD COLUMN ip TEXT; ALTER TABLE devices ADD COLUMN user_agent TEXT; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('devices_last_seen', '{}'); -- cgit 1.4.1 From 51d28272e20d799b2e35a8a14b3c1d9d5f555d10 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 16:00:18 +0100 Subject: Query devices table for last seen info. This is a) simpler than querying user_ips directly and b) means we can purge older entries from user_ips without losing the required info. The storage functions now no longer return the access_token, since it was unused. --- synapse/storage/client_ips.py | 57 ++++++---------------------------------- tests/storage/test_client_ips.py | 1 - 2 files changed, 8 insertions(+), 50 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 8839562269..a4e6d9dbe7 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -392,19 +392,14 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): keys giving the column names """ - res = yield self.runInteraction( - "get_last_client_ip_by_device", - self._get_last_client_ip_by_device_txn, - user_id, - device_id, - retcols=( - "user_id", - "access_token", - "ip", - "user_agent", - "device_id", - "last_seen", - ), + keyvalues = {"user_id": user_id} + if device_id: + keyvalues["device_id"] = device_id + + res = yield self._simple_select_list( + table="devices", + keyvalues=keyvalues, + retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), ) ret = {(d["user_id"], d["device_id"]): d for d in res} @@ -423,42 +418,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): } return ret - @classmethod - def _get_last_client_ip_by_device_txn(cls, txn, user_id, device_id, retcols): - where_clauses = [] - bindings = [] - if device_id is None: - where_clauses.append("user_id = ?") - bindings.extend((user_id,)) - else: - where_clauses.append("(user_id = ? AND device_id = ?)") - bindings.extend((user_id, device_id)) - - if not where_clauses: - return [] - - inner_select = ( - "SELECT MAX(last_seen) mls, user_id, device_id FROM user_ips " - "WHERE %(where)s " - "GROUP BY user_id, device_id" - ) % {"where": " OR ".join(where_clauses)} - - sql = ( - "SELECT %(retcols)s FROM user_ips " - "JOIN (%(inner_select)s) ips ON" - " user_ips.last_seen = ips.mls AND" - " user_ips.user_id = ips.user_id AND" - " (user_ips.device_id = ips.device_id OR" - " (user_ips.device_id IS NULL AND ips.device_id IS NULL)" - " )" - ) % { - "retcols": ",".join("user_ips." + c for c in retcols), - "inner_select": inner_select, - } - - txn.execute(sql, bindings) - return cls.cursor_to_dict(txn) - @defer.inlineCallbacks def get_user_ip_and_agents(self, user): user_id = user.to_string() diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 09305c3bf1..6ac4654085 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -55,7 +55,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): { "user_id": user_id, "device_id": "device_id", - "access_token": "access_token", "ip": "ip", "user_agent": "user_agent", "last_seen": 12345678000, -- cgit 1.4.1 From 2858d10671f889796ca79712e29b8f35b8c9cd98 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 23 Sep 2019 17:22:01 +0100 Subject: Fix the return value in the users_set_deactivated_flag background job --- synapse/storage/registration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 805411a6b2..92dd42fb87 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -860,18 +860,18 @@ class RegistrationStore( ) if batch_size > len(rows): - return True + return (True, rows_processed_nb) else: - return False + return (True, rows_processed_nb) - end = yield self.runInteraction( + end, nb_processed = yield self.runInteraction( "users_set_deactivated_flag", _background_update_set_deactivated_flag_txn ) if end: yield self._end_background_update("users_set_deactivated_flag") - return batch_size + return nb_processed @defer.inlineCallbacks def add_access_token_to_user(self, user_id, token, device_id, valid_until_ms): -- cgit 1.4.1 From 323d685bf743a660339be574aba68c0a63cb6483 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 23 Sep 2019 17:23:49 +0100 Subject: Typo --- synapse/storage/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 92dd42fb87..4c84d804fe 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -862,7 +862,7 @@ class RegistrationStore( if batch_size > len(rows): return (True, rows_processed_nb) else: - return (True, rows_processed_nb) + return (False, rows_processed_nb) end, nb_processed = yield self.runInteraction( "users_set_deactivated_flag", _background_update_set_deactivated_flag_txn -- cgit 1.4.1 From 2c99c634532a62fa3479c1f90929b3eabe7880bc Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 18:49:00 +0200 Subject: Add POST submit_token endpoint for MSISDN (#6078) First part of solving #6076 --- changelog.d/6078.feature | 1 + synapse/handlers/identity.py | 34 ++++++++++++++++++++++++ synapse/rest/client/v2_alpha/account.py | 47 +++++++++++++++++++++++++++++++-- 3 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6078.feature (limited to 'synapse') diff --git a/changelog.d/6078.feature b/changelog.d/6078.feature new file mode 100644 index 0000000000..fae1e52322 --- /dev/null +++ b/changelog.d/6078.feature @@ -0,0 +1 @@ +Add `POST /add_threepid/msisdn/submit_token` endpoint for proxying submitToken on an account_threepid_handler. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index d50d485e06..af6f591942 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -491,6 +491,40 @@ class IdentityHandler(BaseHandler): return validation_session + @defer.inlineCallbacks + def proxy_msisdn_submit_token(self, id_server, client_secret, sid, token): + """Proxy a POST submitToken request to an identity server for verification purposes + + Args: + id_server (str): The identity server URL to contact + + client_secret (str): Secret provided by the client + + sid (str): The ID of the session + + token (str): The verification token + + Raises: + SynapseError: If we failed to contact the identity server + + Returns: + Deferred[dict]: The response dict from the identity server + """ + body = {"client_secret": client_secret, "sid": sid, "token": token} + + try: + return ( + yield self.http_client.post_json_get_json( + id_server + "/_matrix/identity/api/v1/validate/msisdn/submitToken", + body, + ) + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except HttpResponseException as e: + logger.warning("Error contacting msisdn account_threepid_delegate: %s", e) + raise SynapseError(400, "Error contacting the identity server") + def create_id_access_token_header(id_access_token): """Create an Authorization header for passing to SimpleHttpClient as the header value diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index b8c48dc8f1..f99676fd30 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -524,7 +524,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): return 200, ret -class AddThreepidSubmitTokenServlet(RestServlet): +class AddThreepidEmailSubmitTokenServlet(RestServlet): """Handles 3PID validation token submission for adding an email to a user's account""" PATTERNS = client_patterns( @@ -600,6 +600,48 @@ class AddThreepidSubmitTokenServlet(RestServlet): finish_request(request) +class AddThreepidMsisdnSubmitTokenServlet(RestServlet): + """Handles 3PID validation token submission for adding a phone number to a user's + account + """ + + PATTERNS = client_patterns( + "/add_threepid/msisdn/submit_token$", releases=(), unstable=True + ) + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super().__init__() + self.config = hs.config + self.clock = hs.get_clock() + self.store = hs.get_datastore() + self.identity_handler = hs.get_handlers().identity_handler + + @defer.inlineCallbacks + def on_POST(self, request): + if not self.config.account_threepid_delegate_msisdn: + raise SynapseError( + 400, + "This homeserver is not validating phone numbers. Use an identity server " + "instead.", + ) + + body = parse_json_object_from_request(request) + assert_params_in_dict(body, ["client_secret", "sid", "token"]) + + # Proxy submit_token request to msisdn threepid delegate + response = yield self.identity_handler.proxy_msisdn_submit_token( + self.config.account_threepid_delegate_msisdn, + body["client_secret"], + body["sid"], + body["token"], + ) + return 200, response + + class ThreepidRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid$") @@ -807,7 +849,8 @@ def register_servlets(hs, http_server): DeactivateAccountRestServlet(hs).register(http_server) EmailThreepidRequestTokenRestServlet(hs).register(http_server) MsisdnThreepidRequestTokenRestServlet(hs).register(http_server) - AddThreepidSubmitTokenServlet(hs).register(http_server) + AddThreepidEmailSubmitTokenServlet(hs).register(http_server) + AddThreepidMsisdnSubmitTokenServlet(hs).register(http_server) ThreepidRestServlet(hs).register(http_server) ThreepidAddRestServlet(hs).register(http_server) ThreepidBindRestServlet(hs).register(http_server) -- cgit 1.4.1 From b38aa82b83334573e40cb56f076eaf820c51c9ba Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 19:52:43 +0200 Subject: Add m.require_identity_server to /versions unstable_flags (#5972) As MSC2263 states, m.require_identity_server must be set to false when it does not require an identity server to be provided by the client for the purposes of email registration or password reset. Adds an m.require_identity_server flag to /versionss unstable_flags section. This will advertise that Synapse no longer needs id_server as a parameter. --- changelog.d/5972.misc | 1 + synapse/rest/client/versions.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5972.misc (limited to 'synapse') diff --git a/changelog.d/5972.misc b/changelog.d/5972.misc new file mode 100644 index 0000000000..1dc217e899 --- /dev/null +++ b/changelog.d/5972.misc @@ -0,0 +1 @@ +Add m.require_identity_server flag to /version's unstable_features. \ No newline at end of file diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 0058b6b459..3c9ec59d72 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -48,7 +48,19 @@ class VersionsRestServlet(RestServlet): "r0.5.0", ], # as per MSC1497: - "unstable_features": {"m.lazy_load_members": True}, + "unstable_features": { + "m.lazy_load_members": True, + # Advertise to clients that they need not include an `id_server` + # parameter during registration or password reset, as Synapse now decides + # itself which identity server to use (or none at all). + # + # This is also used by a client when they wish to bind a 3PID to their + # account, but not bind it to an identity server, the endpoint for which + # also requires `id_server`. If the homeserver is handling 3PID + # verification itself, there is no need to ask the user for `id_server` to + # be supplied. + "m.require_identity_server": False, + }, }, ) -- cgit 1.4.1 From 1ea3ed76201de678c8c19c568bb3456ae4989a97 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 21:19:25 +0200 Subject: Add m.id_access_token to /versions unstable_features (MSC2264) (#5974) Adds a flag to /versions' unstable_features section indicating that this Synapse understands what an id_access_token is, as per MSC2264. Fixes #5927 --- changelog.d/5974.feature | 1 + synapse/rest/client/versions.py | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 changelog.d/5974.feature (limited to 'synapse') diff --git a/changelog.d/5974.feature b/changelog.d/5974.feature new file mode 100644 index 0000000000..387a444fc4 --- /dev/null +++ b/changelog.d/5974.feature @@ -0,0 +1 @@ +Add m.id_access_token to unstable_features in /versions as per MSC2264. \ No newline at end of file diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 3c9ec59d72..fdab0ddb42 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -50,6 +50,9 @@ class VersionsRestServlet(RestServlet): # as per MSC1497: "unstable_features": { "m.lazy_load_members": True, + # as per MSC2190, as amended by MSC2264 + # to be removed in r0.6.0 + "m.id_access_token": True, # Advertise to clients that they need not include an `id_server` # parameter during registration or password reset, as Synapse now decides # itself which identity server to use (or none at all). -- cgit 1.4.1 From e08ea43463bacd5efacbf6c790c6be0f3cd06ce6 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 21:23:20 +0200 Subject: Use the federation blacklist for requests to untrusted Identity Servers (#6000) Uses a SimpleHttpClient instance equipped with the federation_ip_range_blacklist list for requests to identity servers provided by user input. Does not use a blacklist when contacting identity servers specified by account_threepid_delegates. The homeserver trusts the latter and we don't want to prevent homeserver admins from specifying delegates that are on internal IP addresses. Fixes #5935 --- changelog.d/6000.feature | 1 + docs/sample_config.yaml | 3 +++ synapse/config/server.py | 3 +++ synapse/handlers/identity.py | 18 +++++++++++++++--- synapse/handlers/room_member.py | 7 ++++++- 5 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6000.feature (limited to 'synapse') diff --git a/changelog.d/6000.feature b/changelog.d/6000.feature new file mode 100644 index 0000000000..0a159bd10d --- /dev/null +++ b/changelog.d/6000.feature @@ -0,0 +1 @@ +Apply the federation blacklist to requests to identity servers. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 61d9f09a99..e53b979c35 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -110,6 +110,9 @@ pid_file: DATADIR/homeserver.pid # blacklist IP address CIDR ranges. If this option is not specified, or # specified with an empty list, no ip range blacklist will be enforced. # +# As of Synapse v1.4.0 this option also affects any outbound requests to identity +# servers provided by user input. +# # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly # listed here, since they correspond to unroutable addresses.) # diff --git a/synapse/config/server.py b/synapse/config/server.py index 7f8d315954..419787a89c 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -545,6 +545,9 @@ class ServerConfig(Config): # blacklist IP address CIDR ranges. If this option is not specified, or # specified with an empty list, no ip range blacklist will be enforced. # + # As of Synapse v1.4.0 this option also affects any outbound requests to identity + # servers provided by user input. + # # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly # listed here, since they correspond to unroutable addresses.) # diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index af6f591942..264bdc2189 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -31,6 +31,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.config.emailconfig import ThreepidBehaviour +from synapse.http.client import SimpleHttpClient from synapse.util.stringutils import random_string from ._base import BaseHandler @@ -42,7 +43,12 @@ class IdentityHandler(BaseHandler): def __init__(self, hs): super(IdentityHandler, self).__init__(hs) - self.http_client = hs.get_simple_http_client() + self.http_client = SimpleHttpClient(hs) + # We create a blacklisting instance of SimpleHttpClient for contacting identity + # servers specified by clients + self.blacklisting_http_client = SimpleHttpClient( + hs, ip_blacklist=hs.config.federation_ip_range_blacklist + ) self.federation_http_client = hs.get_http_client() self.hs = hs @@ -143,7 +149,9 @@ class IdentityHandler(BaseHandler): bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,) try: - data = yield self.http_client.post_json_get_json( + # Use the blacklisting http client as this call is only to identity servers + # provided by a client + data = yield self.blacklisting_http_client.post_json_get_json( bind_url, bind_data, headers=headers ) @@ -246,7 +254,11 @@ class IdentityHandler(BaseHandler): headers = {b"Authorization": auth_headers} try: - yield self.http_client.post_json_get_json(url, content, headers) + # Use the blacklisting http client as this call is only to identity servers + # provided by a client + yield self.blacklisting_http_client.post_json_get_json( + url, content, headers + ) changed = True except HttpResponseException as e: changed = False diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 39df0f128d..94cd0cf3ef 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -31,6 +31,7 @@ from synapse import types from synapse.api.constants import EventTypes, Membership from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError from synapse.handlers.identity import LookupAlgorithm, create_id_access_token_header +from synapse.http.client import SimpleHttpClient from synapse.types import RoomID, UserID from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room, user_left_room @@ -62,7 +63,11 @@ class RoomMemberHandler(object): self.auth = hs.get_auth() self.state_handler = hs.get_state_handler() self.config = hs.config - self.simple_http_client = hs.get_simple_http_client() + # We create a blacklisting instance of SimpleHttpClient for contacting identity + # servers specified by clients + self.simple_http_client = SimpleHttpClient( + hs, ip_blacklist=hs.config.federation_ip_range_blacklist + ) self.federation_handler = hs.get_handlers().federation_handler self.directory_handler = hs.get_handlers().directory_handler -- cgit 1.4.1 From 2b071a2ff1ce59c5b7a4930c471470c739c5efe2 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 21:46:34 +0200 Subject: Add an unstable feature flag for separate add/bind 3pid APIs (#6044) Add a m.separate_add_and_bind flag set to True. See MSC2290's Backward Compatibility section for details. --- changelog.d/6044.feature | 1 + synapse/rest/client/versions.py | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/6044.feature (limited to 'synapse') diff --git a/changelog.d/6044.feature b/changelog.d/6044.feature new file mode 100644 index 0000000000..7dc05d4845 --- /dev/null +++ b/changelog.d/6044.feature @@ -0,0 +1 @@ +Add an unstable feature flag for separate add/bind 3pid APIs. \ No newline at end of file diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index fdab0ddb42..1044ae7b4e 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -63,6 +63,8 @@ class VersionsRestServlet(RestServlet): # verification itself, there is no need to ask the user for `id_server` to # be supplied. "m.require_identity_server": False, + # as per MSC2290 + "m.separate_add_and_bind": True, }, }, ) -- cgit 1.4.1 From 50776261e1565afe45a1cfd4a991c24110c2e519 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Sep 2019 22:21:03 +0200 Subject: Add submit_url response parameter to msisdn /requestToken (#6079) Second part of solving #6076 Fixes #6076 We return a submit_url parameter on calls to POST */msisdn/requestToken so that clients know where to submit token information to. --- changelog.d/6079.feature | 1 + docs/sample_config.yaml | 2 ++ synapse/config/registration.py | 2 ++ synapse/handlers/identity.py | 12 +++++++++++- 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6079.feature (limited to 'synapse') diff --git a/changelog.d/6079.feature b/changelog.d/6079.feature new file mode 100644 index 0000000000..bcbb49ac58 --- /dev/null +++ b/changelog.d/6079.feature @@ -0,0 +1 @@ +Add `submit_url` response parameter to `*/msisdn/requestToken` endpoints. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index bd208b17dd..46af6edf1f 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -940,6 +940,8 @@ uploads_path: "DATADIR/uploads" # by the Matrix Identity Service API specification: # https://matrix.org/docs/spec/identity_service/latest # +# If a delegate is specified, the config option public_baseurl must also be filled out. +# account_threepid_delegates: #email: https://example.com # Delegate email sending to example.org #msisdn: http://localhost:8090 # Delegate SMS sending to this local process diff --git a/synapse/config/registration.py b/synapse/config/registration.py index d4654e99b3..bef89e2bf4 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -293,6 +293,8 @@ class RegistrationConfig(Config): # by the Matrix Identity Service API specification: # https://matrix.org/docs/spec/identity_service/latest # + # If a delegate is specified, the config option public_baseurl must also be filled out. + # account_threepid_delegates: #email: https://example.com # Delegate email sending to example.org #msisdn: http://localhost:8090 # Delegate SMS sending to this local process diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 264bdc2189..1f16afd14e 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -452,13 +452,23 @@ class IdentityHandler(BaseHandler): id_server + "/_matrix/identity/api/v1/validate/msisdn/requestToken", params, ) - return data except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") + assert self.hs.config.public_baseurl + + # we need to tell the client to send the token back to us, since it doesn't + # otherwise know where to send it, so add submit_url response parameter + # (see also MSC2078) + data["submit_url"] = ( + self.hs.config.public_baseurl + + "_matrix/client/unstable/add_threepid/msisdn/submit_token" + ) + return data + @defer.inlineCallbacks def validate_threepid_session(self, client_secret, sid): """Validates a threepid session with only the client secret and session ID -- cgit 1.4.1 From a25b66d3f9a02b2cba5430339e9bf45f335becbb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 24 Sep 2019 11:15:08 +0100 Subject: docstrings and comments --- synapse/config/saml2_config.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index 14539fdb2a..be9f04d780 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -20,20 +20,32 @@ from synapse.util.module_loader import load_python_module from ._base import Config, ConfigError -def _dict_merge(merge_dict, into_dct): +def _dict_merge(merge_dict, into_dict): + """Do a deep merge of two dicts + + Recursively merges `merge_dict` into `into_dict`: + * For keys where both `merge_dict` and `into_dict` have a dict value, the values + are recursively merged + * For all other keys, the values in `into_dict` (if any) are overwritten with + the value from `merge_dict`. + + Args: + merge_dict (dict): dict to merge + into_dict (dict): target dict + """ for k, v in merge_dict.items(): - if k not in into_dct: - into_dct[k] = v + if k not in into_dict: + into_dict[k] = v continue - current_val = into_dct[k] + current_val = into_dict[k] if isinstance(v, dict) and isinstance(current_val, dict): _dict_merge(v, current_val) continue # otherwise we just overwrite - into_dct[k] = v + into_dict[k] = v class SAML2Config(Config): @@ -53,12 +65,14 @@ class SAML2Config(Config): self.saml2_enabled = True saml2_config_dict = self._default_saml_config_dict() - _dict_merge(saml2_config.get("sp_config", {}), saml2_config_dict) + _dict_merge( + merge_dict=saml2_config.get("sp_config", {}), into_dict=saml2_config_dict + ) config_path = saml2_config.get("config_path", None) if config_path is not None: mod = load_python_module(config_path) - _dict_merge(mod.CONFIG, saml2_config_dict) + _dict_merge(merge_dict=mod.CONFIG, into_dict=saml2_config_dict) import saml2.config -- cgit 1.4.1 From f02f14e09a9f4ce703fa1091ad40c64957811736 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 24 Sep 2019 14:39:07 +0100 Subject: Fix logging --- synapse/storage/background_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index e5f0668f09..9522acd972 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -218,7 +218,7 @@ class BackgroundUpdateStore(SQLBaseStore): duration_ms = time_stop - time_start logger.info( - "Updating %r. Updated %r items in %rms." + "Running background update %r. Processed %r items in %rms." " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)", update_name, items_updated, -- cgit 1.4.1 From 40fb00f5b7a8a9df15169900df218df19423b93e Mon Sep 17 00:00:00 2001 From: "J. Ryan Stinnett" Date: Tue, 24 Sep 2019 14:39:50 +0100 Subject: Add sid to next_link for email validation (#6097) --- changelog.d/6097.bugfix | 1 + synapse/handlers/identity.py | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 changelog.d/6097.bugfix (limited to 'synapse') diff --git a/changelog.d/6097.bugfix b/changelog.d/6097.bugfix new file mode 100644 index 0000000000..750a8ecf0a --- /dev/null +++ b/changelog.d/6097.bugfix @@ -0,0 +1 @@ +Add sid to next_link for email validation. diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 1f16afd14e..6d42a1aed8 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -18,6 +18,7 @@ """Utilities for interacting with Identity Servers""" import logging +import urllib from canonicaljson import json @@ -328,6 +329,15 @@ class IdentityHandler(BaseHandler): # Generate a session id session_id = random_string(16) + if next_link: + # Manipulate the next_link to add the sid, because the caller won't get + # it until we send a response, by which time we've sent the mail. + if "?" in next_link: + next_link += "&" + else: + next_link += "?" + next_link += "sid=" + urllib.parse.quote(session_id) + # Generate a new validation token token = random_string(32) -- cgit 1.4.1 From 12fe2a29bc4b15f667022c4ceb5428405ca3da9d Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 24 Sep 2019 14:43:38 +0100 Subject: Incorporate review --- synapse/storage/registration.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 4c84d804fe..c3acc8ecaf 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -844,7 +844,7 @@ class RegistrationStore( rows = self.cursor_to_dict(txn) if not rows: - return True + return True, 0 rows_processed_nb = 0 @@ -860,9 +860,9 @@ class RegistrationStore( ) if batch_size > len(rows): - return (True, rows_processed_nb) + return True, len(rows) else: - return (False, rows_processed_nb) + return False, len(rows) end, nb_processed = yield self.runInteraction( "users_set_deactivated_flag", _background_update_set_deactivated_flag_txn -- cgit 1.4.1 From 367158a609d18b6dbd143f8bee0529e743d5b5a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Sep 2019 14:16:16 +0100 Subject: Add wrap_as_background_process decorator. This does the same thing as `run_as_background_process` but means we don't need to create superfluous functions. --- synapse/metrics/background_process_metrics.py | 29 ++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index edd6b42db3..b24e2fab4a 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -15,6 +15,8 @@ import logging import threading +from asyncio import iscoroutine +from functools import wraps import six @@ -197,7 +199,15 @@ def run_as_background_process(desc, func, *args, **kwargs): _background_processes.setdefault(desc, set()).add(proc) try: - yield func(*args, **kwargs) + # We ensureDeferred here to handle coroutines + result = func(*args, **kwargs) + + # We need this check because ensureDeferred doesn't like when + # func doesn't return a Deferred or coroutine. + if iscoroutine(result): + result = defer.ensureDeferred(result) + + return (yield result) except Exception: logger.exception("Background process '%s' threw an exception", desc) finally: @@ -208,3 +218,20 @@ def run_as_background_process(desc, func, *args, **kwargs): with PreserveLoggingContext(): return run() + + +def wrap_as_background_process(desc): + """Decorator that wraps a function that gets called as a background + process. + + Equivalent of calling the function with `run_as_background_process` + """ + + def wrap_as_background_process_inner(func): + @wraps(func) + def wrap_as_background_process_inner_2(*args, **kwargs): + return run_as_background_process(desc, func, *args, **kwargs) + + return wrap_as_background_process_inner_2 + + return wrap_as_background_process_inner -- cgit 1.4.1 From 2135c198d17b41297511a2fc3b39551d160069b2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Sep 2019 14:18:31 +0100 Subject: Add has_completed_background_update This allows checking if a specific background update has completed. --- synapse/storage/background_updates.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index e5f0668f09..3fc25cd828 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -140,7 +140,7 @@ class BackgroundUpdateStore(SQLBaseStore): "background_updates", keyvalues=None, retcol="1", - desc="check_background_updates", + desc="has_completed_background_updates", ) if not updates: self._all_done = True @@ -148,6 +148,29 @@ class BackgroundUpdateStore(SQLBaseStore): return False + async def has_completed_background_update(self, update_name): + """Check if the given background update has finished running. + + Returns: + Deferred[bool] + """ + + if self._all_done: + return True + + if update_name in self._background_update_queue: + return False + + update_exists = await self._simple_select_one_onecol( + "background_updates", + keyvalues={"update_name": update_name}, + retcol="1", + desc="has_completed_background_update", + allow_none=True, + ) + + return not update_exists + @defer.inlineCallbacks def do_next_background_update(self, desired_duration_ms): """Does some amount of work on the next queued background update -- cgit 1.4.1 From 242017db8b7b57be28a019ecbba1619d75d54889 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Sep 2019 15:20:40 +0100 Subject: Prune rows in user_ips older than configured period Defaults to pruning everything older than 28d. --- docs/sample_config.yaml | 6 +++++ synapse/config/server.py | 13 +++++++++ synapse/storage/client_ips.py | 62 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 73 insertions(+), 8 deletions(-) (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 61d9f09a99..cc6035c838 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -313,6 +313,12 @@ listeners: # redaction_retention_period: 7d +# How long to track users' last seen time and IPs in the database. +# +# Defaults to `28d`. Set to `null` to disable. +# +#user_ips_max_age: 14d + ## TLS ## diff --git a/synapse/config/server.py b/synapse/config/server.py index 7f8d315954..655e7487a4 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -172,6 +172,13 @@ class ServerConfig(Config): else: self.redaction_retention_period = None + # How long to keep entries in the `users_ips` table. + user_ips_max_age = config.get("user_ips_max_age", "28d") + if user_ips_max_age is not None: + self.user_ips_max_age = self.parse_duration(user_ips_max_age) + else: + self.user_ips_max_age = None + # Options to disable HS self.hs_disabled = config.get("hs_disabled", False) self.hs_disabled_message = config.get("hs_disabled_message", "") @@ -735,6 +742,12 @@ class ServerConfig(Config): # Defaults to `7d`. Set to `null` to disable. # redaction_retention_period: 7d + + # How long to track users' last seen time and IPs in the database. + # + # Defaults to `28d`. Set to `null` to disable. + # + #user_ips_max_age: 14d """ % locals() ) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a4e6d9dbe7..176c812b1f 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -19,7 +19,7 @@ from six import iteritems from twisted.internet import defer -from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.util.caches import CACHE_SIZE_FACTOR from . import background_updates @@ -42,6 +42,8 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): super(ClientIpStore, self).__init__(db_conn, hs) + self.user_ips_max_age = hs.config.user_ips_max_age + self.register_background_index_update( "user_ips_device_index", index_name="user_ips_device_id", @@ -100,6 +102,9 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "before", "shutdown", self._update_client_ips_batch ) + if self.user_ips_max_age: + self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) + @defer.inlineCallbacks def _remove_user_ip_nonunique(self, progress, batch_size): def f(conn): @@ -319,20 +324,19 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._batch_row_update[key] = (user_agent, device_id, now) + @wrap_as_background_process("update_client_ips") def _update_client_ips_batch(self): # If the DB pool has already terminated, don't try updating if not self.hs.get_db_pool().running: return - def update(): - to_update = self._batch_row_update - self._batch_row_update = {} - return self.runInteraction( - "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update - ) + to_update = self._batch_row_update + self._batch_row_update = {} - return run_as_background_process("update_client_ips", update) + return self.runInteraction( + "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update + ) def _update_client_ips_batch_txn(self, txn, to_update): if "user_ips" in self._unsafe_to_upsert_tables or ( @@ -496,3 +500,45 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): yield self._end_background_update("devices_last_seen") return updated + + @wrap_as_background_process("prune_old_user_ips") + async def _prune_old_user_ips(self): + """Removes entries in user IPs older than the configured period. + """ + + if not self.user_ips_max_age: + # Nothing to do + return + + if not await self.has_completed_background_update("devices_last_seen"): + # Only start pruning if we have finished populating the devices + # last seen info. + return + + # We do a slightly funky SQL delete to ensure we don't try and delete + # too much at once (as the table may be very large from before we + # started pruning). + # + # This works by finding the max last_seen that is less than the given + # time, but has no more than N rows before it, deleting all rows with + # a lesser last_seen time. (We COALESCE so that the sub-SELECT always + # returns exactly one row). + sql = """ + DELETE FROM user_ips + WHERE last_seen <= ( + SELECT COALESCE(MAX(last_seen), -1) + FROM ( + SELECT last_seen FROM user_ips + WHERE last_seen <= ? + ORDER BY last_seen ASC + LIMIT 5000 + ) AS u + ) + """ + + timestamp = self.clock.time_msec() - self.user_ips_max_age + + def _prune_old_user_ips_txn(txn): + txn.execute(sql, (timestamp,)) + + await self.runInteraction("_prune_old_user_ips", _prune_old_user_ips_txn) -- cgit 1.4.1 From 566ac40939404649d58c053e97dba75810f95339 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 24 Sep 2019 17:01:09 +0100 Subject: remove unused parameter to get_user_id_by_threepid (#6099) Added in #5377, apparently in error --- changelog.d/6099.misc | 1 + synapse/storage/registration.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6099.misc (limited to 'synapse') diff --git a/changelog.d/6099.misc b/changelog.d/6099.misc new file mode 100644 index 0000000000..8415c6759b --- /dev/null +++ b/changelog.d/6099.misc @@ -0,0 +1 @@ +Remove unused parameter to get_user_id_by_threepid. diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 805411a6b2..5cf2c893aa 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -495,7 +495,7 @@ class RegistrationWorkerStore(SQLBaseStore): ) @defer.inlineCallbacks - def get_user_id_by_threepid(self, medium, address, require_verified=False): + def get_user_id_by_threepid(self, medium, address): """Returns user id from threepid Args: -- cgit 1.4.1 From 8004d6ca2faf0f2f843fcdcaf225d7bcab847503 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 25 Sep 2019 11:32:05 +0100 Subject: Refactor code for calculating registration flows (#6106) because, frankly, it looked like it was written by an axe-murderer. This should be a non-functional change, except that where `m.login.dummy` was previously advertised *before* `m.login.terms`, it will now be advertised afterwards. AFAICT that should have no effect, and will be more consistent with the flows that involve passing a 3pid. --- changelog.d/6106.misc | 1 + synapse/rest/client/v2_alpha/register.py | 124 ++++++++++++++-------------- tests/rest/client/v2_alpha/test_register.py | 79 +++++++++++++++--- tests/test_terms_auth.py | 24 ++++-- 4 files changed, 145 insertions(+), 83 deletions(-) create mode 100644 changelog.d/6106.misc (limited to 'synapse') diff --git a/changelog.d/6106.misc b/changelog.d/6106.misc new file mode 100644 index 0000000000..d732091779 --- /dev/null +++ b/changelog.d/6106.misc @@ -0,0 +1 @@ +Refactor code for calculating registration flows. diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 135a70808f..e3f3d9126f 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -16,6 +16,7 @@ import hmac import logging +from typing import List, Union from six import string_types @@ -31,8 +32,11 @@ from synapse.api.errors import ( ThreepidValidationError, UnrecognizedRequestError, ) +from synapse.config.captcha import CaptchaConfig +from synapse.config.consent_config import ConsentConfig from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.ratelimiting import FederationRateLimitConfig +from synapse.config.registration import RegistrationConfig from synapse.config.server import is_threepid_reserved from synapse.http.server import finish_request from synapse.http.servlet import ( @@ -371,6 +375,8 @@ class RegisterRestServlet(RestServlet): self.ratelimiter = hs.get_registration_ratelimiter() self.clock = hs.get_clock() + self._registration_flows = _calculate_registration_flows(hs.config) + @interactive_auth_handler @defer.inlineCallbacks def on_POST(self, request): @@ -491,69 +497,8 @@ class RegisterRestServlet(RestServlet): assigned_user_id=registered_user_id, ) - # FIXME: need a better error than "no auth flow found" for scenarios - # where we required 3PID for registration but the user didn't give one - require_email = "email" in self.hs.config.registrations_require_3pid - require_msisdn = "msisdn" in self.hs.config.registrations_require_3pid - - show_msisdn = True - if self.hs.config.disable_msisdn_registration: - show_msisdn = False - require_msisdn = False - - flows = [] - if self.hs.config.enable_registration_captcha: - # only support 3PIDless registration if no 3PIDs are required - if not require_email and not require_msisdn: - # Also add a dummy flow here, otherwise if a client completes - # recaptcha first we'll assume they were going for this flow - # and complete the request, when they could have been trying to - # complete one of the flows with email/msisdn auth. - flows.extend([[LoginType.RECAPTCHA, LoginType.DUMMY]]) - # only support the email-only flow if we don't require MSISDN 3PIDs - if not require_msisdn: - flows.extend([[LoginType.RECAPTCHA, LoginType.EMAIL_IDENTITY]]) - - if show_msisdn: - # only support the MSISDN-only flow if we don't require email 3PIDs - if not require_email: - flows.extend([[LoginType.RECAPTCHA, LoginType.MSISDN]]) - # always let users provide both MSISDN & email - flows.extend( - [[LoginType.RECAPTCHA, LoginType.MSISDN, LoginType.EMAIL_IDENTITY]] - ) - else: - # only support 3PIDless registration if no 3PIDs are required - if not require_email and not require_msisdn: - flows.extend([[LoginType.DUMMY]]) - # only support the email-only flow if we don't require MSISDN 3PIDs - if not require_msisdn: - flows.extend([[LoginType.EMAIL_IDENTITY]]) - - if show_msisdn: - # only support the MSISDN-only flow if we don't require email 3PIDs - if not require_email or require_msisdn: - flows.extend([[LoginType.MSISDN]]) - # always let users provide both MSISDN & email - flows.extend([[LoginType.MSISDN, LoginType.EMAIL_IDENTITY]]) - - # Append m.login.terms to all flows if we're requiring consent - if self.hs.config.user_consent_at_registration: - new_flows = [] - for flow in flows: - inserted = False - # m.login.terms should go near the end but before msisdn or email auth - for i, stage in enumerate(flow): - if stage == LoginType.EMAIL_IDENTITY or stage == LoginType.MSISDN: - flow.insert(i, LoginType.TERMS) - inserted = True - break - if not inserted: - flow.append(LoginType.TERMS) - flows.extend(new_flows) - auth_result, params, session_id = yield self.auth_handler.check_auth( - flows, body, self.hs.get_ip_from_request(request) + self._registration_flows, body, self.hs.get_ip_from_request(request) ) # Check that we're not trying to register a denied 3pid. @@ -716,6 +661,61 @@ class RegisterRestServlet(RestServlet): ) +def _calculate_registration_flows( + # technically `config` has to provide *all* of these interfaces, not just one + config: Union[RegistrationConfig, ConsentConfig, CaptchaConfig], +) -> List[List[str]]: + """Get a suitable flows list for registration + + Args: + config: server configuration + + Returns: a list of supported flows + """ + # FIXME: need a better error than "no auth flow found" for scenarios + # where we required 3PID for registration but the user didn't give one + require_email = "email" in config.registrations_require_3pid + require_msisdn = "msisdn" in config.registrations_require_3pid + + show_msisdn = True + if config.disable_msisdn_registration: + show_msisdn = False + require_msisdn = False + + flows = [] + + # only support 3PIDless registration if no 3PIDs are required + if not require_email and not require_msisdn: + # Add a dummy step here, otherwise if a client completes + # recaptcha first we'll assume they were going for this flow + # and complete the request, when they could have been trying to + # complete one of the flows with email/msisdn auth. + flows.append([LoginType.DUMMY]) + + # only support the email-only flow if we don't require MSISDN 3PIDs + if not require_msisdn: + flows.append([LoginType.EMAIL_IDENTITY]) + + # only support the MSISDN-only flow if we don't require email 3PIDs + if show_msisdn and not require_email: + flows.append([LoginType.MSISDN]) + + if show_msisdn: + flows.append([LoginType.MSISDN, LoginType.EMAIL_IDENTITY]) + + # Prepend m.login.terms to all flows if we're requiring consent + if config.user_consent_at_registration: + for flow in flows: + flow.insert(0, LoginType.TERMS) + + # Prepend recaptcha to all flows if we're requiring captcha + if config.enable_registration_captcha: + for flow in flows: + flow.insert(0, LoginType.RECAPTCHA) + + return flows + + def register_servlets(hs, http_server): EmailRegisterRequestTokenRestServlet(hs).register(http_server) MsisdnRegisterRequestTokenRestServlet(hs).register(http_server) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index ab4d7d70d0..bc2dc47973 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -34,19 +34,12 @@ from tests import unittest class RegisterRestServletTestCase(unittest.HomeserverTestCase): servlets = [register.register_servlets] + url = b"/_matrix/client/r0/register" - def make_homeserver(self, reactor, clock): - - self.url = b"/_matrix/client/r0/register" - - self.hs = self.setup_test_homeserver() - self.hs.config.enable_registration = True - self.hs.config.registrations_require_3pid = [] - self.hs.config.auto_join_rooms = [] - self.hs.config.enable_registration_captcha = False - self.hs.config.allow_guest_access = True - - return self.hs + def default_config(self, name="test"): + config = super().default_config(name) + config["allow_guest_access"] = True + return config def test_POST_appservice_registration_valid(self): user_id = "@as_user_kermit:test" @@ -199,6 +192,68 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.result["code"], b"200", channel.result) + def test_advertised_flows(self): + request, channel = self.make_request(b"POST", self.url, b"{}") + self.render(request) + self.assertEquals(channel.result["code"], b"401", channel.result) + flows = channel.json_body["flows"] + + # with the stock config, we expect all four combinations of 3pid + self.assertCountEqual( + [ + ["m.login.dummy"], + ["m.login.email.identity"], + ["m.login.msisdn"], + ["m.login.msisdn", "m.login.email.identity"], + ], + (f["stages"] for f in flows), + ) + + @unittest.override_config( + { + "enable_registration_captcha": True, + "user_consent": { + "version": "1", + "template_dir": "/", + "require_at_registration": True, + }, + } + ) + def test_advertised_flows_captcha_and_terms(self): + request, channel = self.make_request(b"POST", self.url, b"{}") + self.render(request) + self.assertEquals(channel.result["code"], b"401", channel.result) + flows = channel.json_body["flows"] + + self.assertCountEqual( + [ + ["m.login.recaptcha", "m.login.terms", "m.login.dummy"], + ["m.login.recaptcha", "m.login.terms", "m.login.email.identity"], + ["m.login.recaptcha", "m.login.terms", "m.login.msisdn"], + [ + "m.login.recaptcha", + "m.login.terms", + "m.login.msisdn", + "m.login.email.identity", + ], + ], + (f["stages"] for f in flows), + ) + + @unittest.override_config( + {"registrations_require_3pid": ["email"], "disable_msisdn_registration": True} + ) + def test_advertised_flows_no_msisdn_email_required(self): + request, channel = self.make_request(b"POST", self.url, b"{}") + self.render(request) + self.assertEquals(channel.result["code"], b"401", channel.result) + flows = channel.json_body["flows"] + + # with the stock config, we expect all four combinations of 3pid + self.assertCountEqual( + [["m.login.email.identity"]], (f["stages"] for f in flows) + ) + class AccountValidityTestCase(unittest.HomeserverTestCase): diff --git a/tests/test_terms_auth.py b/tests/test_terms_auth.py index 52739fbabc..5ec5d2b358 100644 --- a/tests/test_terms_auth.py +++ b/tests/test_terms_auth.py @@ -28,6 +28,21 @@ from tests import unittest class TermsTestCase(unittest.HomeserverTestCase): servlets = [register_servlets] + def default_config(self, name="test"): + config = super().default_config(name) + config.update( + { + "public_baseurl": "https://example.org/", + "user_consent": { + "version": "1.0", + "policy_name": "My Cool Privacy Policy", + "template_dir": "/", + "require_at_registration": True, + }, + } + ) + return config + def prepare(self, reactor, clock, hs): self.clock = MemoryReactorClock() self.hs_clock = Clock(self.clock) @@ -35,17 +50,8 @@ class TermsTestCase(unittest.HomeserverTestCase): self.registration_handler = Mock() self.auth_handler = Mock() self.device_handler = Mock() - hs.config.enable_registration = True - hs.config.registrations_require_3pid = [] - hs.config.auto_join_rooms = [] - hs.config.enable_registration_captcha = False def test_ui_auth(self): - self.hs.config.user_consent_at_registration = True - self.hs.config.user_consent_policy_name = "My Cool Privacy Policy" - self.hs.config.public_baseurl = "https://example.org/" - self.hs.config.user_consent_version = "1.0" - # Do a UI auth request request, channel = self.make_request(b"POST", self.url, b"{}") self.render(request) -- cgit 1.4.1 From fde4ce22135b06d05b646141f90cdf3038ed4fe2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 11:32:41 +0100 Subject: Don't create new span for get_user_by_req We don't actually care about what happens in `get_user_by_req` and having it as a separate span means that the entity tag isn't added to the servlet spans, making it harder to search. --- synapse/api/auth.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse') diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 9e445cd808..59852bdbdb 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -179,7 +179,6 @@ class Auth(object): def get_public_keys(self, invite_event): return event_auth.get_public_keys(invite_event) - @opentracing.trace @defer.inlineCallbacks def get_user_by_req( self, request, allow_guest=False, rights="access", allow_expired=False -- cgit 1.4.1 From 2cd98812ba338eefe83fee4ae2390d00f5499de9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 25 Sep 2019 11:33:03 +0100 Subject: Refactor the user-interactive auth handling (#6105) Pull the checkers out to their own classes, rather than having them lost in a massive 1000-line class which does everything. This is also preparation for some more intelligent advertising of flows, as per #6100 --- changelog.d/6105.misc | 1 + synapse/handlers/auth.py | 141 ++------------------- synapse/handlers/ui_auth/__init__.py | 22 ++++ synapse/handlers/ui_auth/checkers.py | 216 ++++++++++++++++++++++++++++++++ tests/rest/client/v2_alpha/test_auth.py | 26 ++-- 5 files changed, 265 insertions(+), 141 deletions(-) create mode 100644 changelog.d/6105.misc create mode 100644 synapse/handlers/ui_auth/__init__.py create mode 100644 synapse/handlers/ui_auth/checkers.py (limited to 'synapse') diff --git a/changelog.d/6105.misc b/changelog.d/6105.misc new file mode 100644 index 0000000000..2e838a35c6 --- /dev/null +++ b/changelog.d/6105.misc @@ -0,0 +1 @@ +Refactor the user-interactive auth handling. diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 374372b69e..f920c2f6c1 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -21,10 +21,8 @@ import unicodedata import attr import bcrypt import pymacaroons -from canonicaljson import json from twisted.internet import defer -from twisted.web.client import PartialDownloadError import synapse.util.stringutils as stringutils from synapse.api.constants import LoginType @@ -38,7 +36,8 @@ from synapse.api.errors import ( UserDeactivatedError, ) from synapse.api.ratelimiting import Ratelimiter -from synapse.config.emailconfig import ThreepidBehaviour +from synapse.handlers.ui_auth import INTERACTIVE_AUTH_CHECKERS +from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker from synapse.logging.context import defer_to_thread from synapse.module_api import ModuleApi from synapse.types import UserID @@ -58,13 +57,12 @@ class AuthHandler(BaseHandler): hs (synapse.server.HomeServer): """ super(AuthHandler, self).__init__(hs) - self.checkers = { - LoginType.RECAPTCHA: self._check_recaptcha, - LoginType.EMAIL_IDENTITY: self._check_email_identity, - LoginType.MSISDN: self._check_msisdn, - LoginType.DUMMY: self._check_dummy_auth, - LoginType.TERMS: self._check_terms_auth, - } + + self.checkers = {} # type: dict[str, UserInteractiveAuthChecker] + for auth_checker_class in INTERACTIVE_AUTH_CHECKERS: + inst = auth_checker_class(hs) + self.checkers[inst.AUTH_TYPE] = inst + self.bcrypt_rounds = hs.config.bcrypt_rounds # This is not a cache per se, but a store of all current sessions that @@ -292,7 +290,7 @@ class AuthHandler(BaseHandler): sess["creds"] = {} creds = sess["creds"] - result = yield self.checkers[stagetype](authdict, clientip) + result = yield self.checkers[stagetype].check_auth(authdict, clientip) if result: creds[stagetype] = result self._save_session(sess) @@ -363,7 +361,7 @@ class AuthHandler(BaseHandler): login_type = authdict["type"] checker = self.checkers.get(login_type) if checker is not None: - res = yield checker(authdict, clientip=clientip) + res = yield checker.check_auth(authdict, clientip=clientip) return res # build a v1-login-style dict out of the authdict and fall back to the @@ -376,125 +374,6 @@ class AuthHandler(BaseHandler): (canonical_id, callback) = yield self.validate_login(user_id, authdict) return canonical_id - @defer.inlineCallbacks - def _check_recaptcha(self, authdict, clientip, **kwargs): - try: - user_response = authdict["response"] - except KeyError: - # Client tried to provide captcha but didn't give the parameter: - # bad request. - raise LoginError( - 400, "Captcha response is required", errcode=Codes.CAPTCHA_NEEDED - ) - - logger.info( - "Submitting recaptcha response %s with remoteip %s", user_response, clientip - ) - - # TODO: get this from the homeserver rather than creating a new one for - # each request - try: - client = self.hs.get_simple_http_client() - resp_body = yield client.post_urlencoded_get_json( - self.hs.config.recaptcha_siteverify_api, - args={ - "secret": self.hs.config.recaptcha_private_key, - "response": user_response, - "remoteip": clientip, - }, - ) - except PartialDownloadError as pde: - # Twisted is silly - data = pde.response - resp_body = json.loads(data) - - if "success" in resp_body: - # Note that we do NOT check the hostname here: we explicitly - # intend the CAPTCHA to be presented by whatever client the - # user is using, we just care that they have completed a CAPTCHA. - logger.info( - "%s reCAPTCHA from hostname %s", - "Successful" if resp_body["success"] else "Failed", - resp_body.get("hostname"), - ) - if resp_body["success"]: - return True - raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) - - def _check_email_identity(self, authdict, **kwargs): - return self._check_threepid("email", authdict, **kwargs) - - def _check_msisdn(self, authdict, **kwargs): - return self._check_threepid("msisdn", authdict) - - def _check_dummy_auth(self, authdict, **kwargs): - return defer.succeed(True) - - def _check_terms_auth(self, authdict, **kwargs): - return defer.succeed(True) - - @defer.inlineCallbacks - def _check_threepid(self, medium, authdict, **kwargs): - if "threepid_creds" not in authdict: - raise LoginError(400, "Missing threepid_creds", Codes.MISSING_PARAM) - - threepid_creds = authdict["threepid_creds"] - - identity_handler = self.hs.get_handlers().identity_handler - - logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - if medium == "email": - threepid = yield identity_handler.threepid_from_creds( - self.hs.config.account_threepid_delegate_email, threepid_creds - ) - elif medium == "msisdn": - threepid = yield identity_handler.threepid_from_creds( - self.hs.config.account_threepid_delegate_msisdn, threepid_creds - ) - else: - raise SynapseError(400, "Unrecognized threepid medium: %s" % (medium,)) - elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: - row = yield self.store.get_threepid_validation_session( - medium, - threepid_creds["client_secret"], - sid=threepid_creds["sid"], - validated=True, - ) - - threepid = ( - { - "medium": row["medium"], - "address": row["address"], - "validated_at": row["validated_at"], - } - if row - else None - ) - - if row: - # Valid threepid returned, delete from the db - yield self.store.delete_threepid_session(threepid_creds["sid"]) - else: - raise SynapseError( - 400, "Password resets are not enabled on this homeserver" - ) - - if not threepid: - raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) - - if threepid["medium"] != medium: - raise LoginError( - 401, - "Expecting threepid of type '%s', got '%s'" - % (medium, threepid["medium"]), - errcode=Codes.UNAUTHORIZED, - ) - - threepid["threepid_creds"] = authdict["threepid_creds"] - - return threepid - def _get_params_recaptcha(self): return {"public_key": self.hs.config.recaptcha_public_key} diff --git a/synapse/handlers/ui_auth/__init__.py b/synapse/handlers/ui_auth/__init__.py new file mode 100644 index 0000000000..824f37f8f8 --- /dev/null +++ b/synapse/handlers/ui_auth/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module implements user-interactive auth verification. + +TODO: move more stuff out of AuthHandler in here. + +""" + +from synapse.handlers.ui_auth.checkers import INTERACTIVE_AUTH_CHECKERS # noqa: F401 diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py new file mode 100644 index 0000000000..fd633b7b0e --- /dev/null +++ b/synapse/handlers/ui_auth/checkers.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from canonicaljson import json + +from twisted.internet import defer +from twisted.web.client import PartialDownloadError + +from synapse.api.constants import LoginType +from synapse.api.errors import Codes, LoginError, SynapseError +from synapse.config.emailconfig import ThreepidBehaviour + +logger = logging.getLogger(__name__) + + +class UserInteractiveAuthChecker: + """Abstract base class for an interactive auth checker""" + + def __init__(self, hs): + pass + + def check_auth(self, authdict, clientip): + """Given the authentication dict from the client, attempt to check this step + + Args: + authdict (dict): authentication dictionary from the client + clientip (str): The IP address of the client. + + Raises: + SynapseError if authentication failed + + Returns: + Deferred: the result of authentication (to pass back to the client?) + """ + raise NotImplementedError() + + +class DummyAuthChecker(UserInteractiveAuthChecker): + AUTH_TYPE = LoginType.DUMMY + + def check_auth(self, authdict, clientip): + return defer.succeed(True) + + +class TermsAuthChecker(UserInteractiveAuthChecker): + AUTH_TYPE = LoginType.TERMS + + def check_auth(self, authdict, clientip): + return defer.succeed(True) + + +class RecaptchaAuthChecker(UserInteractiveAuthChecker): + AUTH_TYPE = LoginType.RECAPTCHA + + def __init__(self, hs): + super().__init__(hs) + self._http_client = hs.get_simple_http_client() + self._url = hs.config.recaptcha_siteverify_api + self._secret = hs.config.recaptcha_private_key + + @defer.inlineCallbacks + def check_auth(self, authdict, clientip): + try: + user_response = authdict["response"] + except KeyError: + # Client tried to provide captcha but didn't give the parameter: + # bad request. + raise LoginError( + 400, "Captcha response is required", errcode=Codes.CAPTCHA_NEEDED + ) + + logger.info( + "Submitting recaptcha response %s with remoteip %s", user_response, clientip + ) + + # TODO: get this from the homeserver rather than creating a new one for + # each request + try: + resp_body = yield self._http_client.post_urlencoded_get_json( + self._url, + args={ + "secret": self._secret, + "response": user_response, + "remoteip": clientip, + }, + ) + except PartialDownloadError as pde: + # Twisted is silly + data = pde.response + resp_body = json.loads(data) + + if "success" in resp_body: + # Note that we do NOT check the hostname here: we explicitly + # intend the CAPTCHA to be presented by whatever client the + # user is using, we just care that they have completed a CAPTCHA. + logger.info( + "%s reCAPTCHA from hostname %s", + "Successful" if resp_body["success"] else "Failed", + resp_body.get("hostname"), + ) + if resp_body["success"]: + return True + raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) + + +class _BaseThreepidAuthChecker: + def __init__(self, hs): + self.hs = hs + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def _check_threepid(self, medium, authdict): + if "threepid_creds" not in authdict: + raise LoginError(400, "Missing threepid_creds", Codes.MISSING_PARAM) + + threepid_creds = authdict["threepid_creds"] + + identity_handler = self.hs.get_handlers().identity_handler + + logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) + if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + if medium == "email": + threepid = yield identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_email, threepid_creds + ) + elif medium == "msisdn": + threepid = yield identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_msisdn, threepid_creds + ) + else: + raise SynapseError(400, "Unrecognized threepid medium: %s" % (medium,)) + elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + row = yield self.store.get_threepid_validation_session( + medium, + threepid_creds["client_secret"], + sid=threepid_creds["sid"], + validated=True, + ) + + threepid = ( + { + "medium": row["medium"], + "address": row["address"], + "validated_at": row["validated_at"], + } + if row + else None + ) + + if row: + # Valid threepid returned, delete from the db + yield self.store.delete_threepid_session(threepid_creds["sid"]) + else: + raise SynapseError( + 400, "Password resets are not enabled on this homeserver" + ) + + if not threepid: + raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) + + if threepid["medium"] != medium: + raise LoginError( + 401, + "Expecting threepid of type '%s', got '%s'" + % (medium, threepid["medium"]), + errcode=Codes.UNAUTHORIZED, + ) + + threepid["threepid_creds"] = authdict["threepid_creds"] + + return threepid + + +class EmailIdentityAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChecker): + AUTH_TYPE = LoginType.EMAIL_IDENTITY + + def __init__(self, hs): + UserInteractiveAuthChecker.__init__(self, hs) + _BaseThreepidAuthChecker.__init__(self, hs) + + def check_auth(self, authdict, clientip): + return self._check_threepid("email", authdict) + + +class MsisdnAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChecker): + AUTH_TYPE = LoginType.MSISDN + + def __init__(self, hs): + UserInteractiveAuthChecker.__init__(self, hs) + _BaseThreepidAuthChecker.__init__(self, hs) + + def check_auth(self, authdict, clientip): + return self._check_threepid("msisdn", authdict) + + +INTERACTIVE_AUTH_CHECKERS = [ + DummyAuthChecker, + TermsAuthChecker, + RecaptchaAuthChecker, + EmailIdentityAuthChecker, + MsisdnAuthChecker, +] +"""A list of UserInteractiveAuthChecker classes""" diff --git a/tests/rest/client/v2_alpha/test_auth.py b/tests/rest/client/v2_alpha/test_auth.py index b9ef46e8fb..b6df1396ad 100644 --- a/tests/rest/client/v2_alpha/test_auth.py +++ b/tests/rest/client/v2_alpha/test_auth.py @@ -18,11 +18,22 @@ from twisted.internet.defer import succeed import synapse.rest.admin from synapse.api.constants import LoginType +from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker from synapse.rest.client.v2_alpha import auth, register from tests import unittest +class DummyRecaptchaChecker(UserInteractiveAuthChecker): + def __init__(self, hs): + super().__init__(hs) + self.recaptcha_attempts = [] + + def check_auth(self, authdict, clientip): + self.recaptcha_attempts.append((authdict, clientip)) + return succeed(True) + + class FallbackAuthTests(unittest.HomeserverTestCase): servlets = [ @@ -44,15 +55,9 @@ class FallbackAuthTests(unittest.HomeserverTestCase): return hs def prepare(self, reactor, clock, hs): + self.recaptcha_checker = DummyRecaptchaChecker(hs) auth_handler = hs.get_auth_handler() - - self.recaptcha_attempts = [] - - def _recaptcha(authdict, clientip): - self.recaptcha_attempts.append((authdict, clientip)) - return succeed(True) - - auth_handler.checkers[LoginType.RECAPTCHA] = _recaptcha + auth_handler.checkers[LoginType.RECAPTCHA] = self.recaptcha_checker @unittest.INFO def test_fallback_captcha(self): @@ -89,8 +94,9 @@ class FallbackAuthTests(unittest.HomeserverTestCase): self.assertEqual(request.code, 200) # The recaptcha handler is called with the response given - self.assertEqual(len(self.recaptcha_attempts), 1) - self.assertEqual(self.recaptcha_attempts[0][0]["response"], "a") + attempts = self.recaptcha_checker.recaptcha_attempts + self.assertEqual(len(attempts), 1) + self.assertEqual(attempts[0][0]["response"], "a") # also complete the dummy auth request, channel = self.make_request( -- cgit 1.4.1 From 5d99713854ee0672ba95d72ef13ce1cbcbc781c5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 11:39:15 +0100 Subject: Add tags for event_id and txn_id in event sending This will make it easier to search for sending event requests. --- synapse/rest/client/v1/room.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'synapse') diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index a6a7b3b57e..19f150af9d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -44,6 +44,7 @@ from synapse.rest.client.v2_alpha._base import client_patterns from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID +from synapse.logging.opentracing import set_tag logger = logging.getLogger(__name__) @@ -81,6 +82,7 @@ class RoomCreateRestServlet(TransactionRestServlet): ) def on_PUT(self, request, txn_id): + set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request(request, self.on_POST, request) @defer.inlineCallbacks @@ -181,6 +183,9 @@ class RoomStateEventRestServlet(TransactionRestServlet): def on_PUT(self, request, room_id, event_type, state_key, txn_id=None): requester = yield self.auth.get_user_by_req(request) + if txn_id: + set_tag("txn_id", txn_id) + content = parse_json_object_from_request(request) event_dict = { @@ -209,6 +214,7 @@ class RoomStateEventRestServlet(TransactionRestServlet): ret = {} if event: + set_tag("event_id", event.event_id) ret = {"event_id": event.event_id} return 200, ret @@ -244,12 +250,15 @@ class RoomSendEventRestServlet(TransactionRestServlet): requester, event_dict, txn_id=txn_id ) + set_tag("event_id", event.event_id) return 200, {"event_id": event.event_id} def on_GET(self, request, room_id, event_type, txn_id): return 200, "Not implemented" def on_PUT(self, request, room_id, event_type, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, event_type, txn_id ) @@ -310,6 +319,8 @@ class JoinRoomAliasServlet(TransactionRestServlet): return 200, {"room_id": room_id} def on_PUT(self, request, room_identifier, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_identifier, txn_id ) @@ -655,6 +666,8 @@ class RoomForgetRestServlet(TransactionRestServlet): return 200, {} def on_PUT(self, request, room_id, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, txn_id ) @@ -738,6 +751,8 @@ class RoomMembershipRestServlet(TransactionRestServlet): return True def on_PUT(self, request, room_id, membership_action, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, membership_action, txn_id ) @@ -771,9 +786,12 @@ class RoomRedactEventRestServlet(TransactionRestServlet): txn_id=txn_id, ) + set_tag("event_id", event.event_id) return 200, {"event_id": event.event_id} def on_PUT(self, request, room_id, event_id, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, event_id, txn_id ) -- cgit 1.4.1 From dc01cad690e3c6cb1ccb57995554dd93ab1636f2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 11:59:00 +0100 Subject: Add device and appservice tags --- synapse/api/auth.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse') diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 59852bdbdb..cb50579fd2 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -211,6 +211,7 @@ class Auth(object): if user_id: request.authenticated_entity = user_id opentracing.set_tag("authenticated_entity", user_id) + opentracing.set_tag("appservice_id", app_service.id) if ip_addr and self.hs.config.track_appservice_user_ips: yield self.store.insert_client_ip( @@ -262,6 +263,8 @@ class Auth(object): request.authenticated_entity = user.to_string() opentracing.set_tag("authenticated_entity", user.to_string()) + if device_id: + opentracing.set_tag("device_id", device_id) return synapse.types.create_requester( user, token_id, is_guest, device_id, app_service=app_service -- cgit 1.4.1 From dc2c97e1a36cc3c2f584223a0d8a3faa810471c0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 11:59:05 +0100 Subject: isort --- synapse/rest/client/v1/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 19f150af9d..6bf924dedc 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -39,12 +39,12 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.logging.opentracing import set_tag from synapse.rest.client.transactions import HttpTransactionCache from synapse.rest.client.v2_alpha._base import client_patterns from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID -from synapse.logging.opentracing import set_tag logger = logging.getLogger(__name__) -- cgit 1.4.1 From 990928abde4f3ccd7d43e6214abd7d36434953a9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 25 Sep 2019 12:10:26 +0100 Subject: Stop advertising unsupported flows for registration (#6107) If email or msisdn verification aren't supported, let's stop advertising them for registration. Fixes #6100. --- changelog.d/6107.bugfix | 1 + synapse/handlers/auth.py | 11 +++++++++- synapse/handlers/ui_auth/checkers.py | 26 +++++++++++++++++++++++ synapse/rest/client/v2_alpha/register.py | 32 ++++++++++++++++++++++++++--- tests/rest/client/v2_alpha/test_register.py | 29 +++++++++++++++----------- 5 files changed, 83 insertions(+), 16 deletions(-) create mode 100644 changelog.d/6107.bugfix (limited to 'synapse') diff --git a/changelog.d/6107.bugfix b/changelog.d/6107.bugfix new file mode 100644 index 0000000000..d4b9516ac7 --- /dev/null +++ b/changelog.d/6107.bugfix @@ -0,0 +1 @@ +Ensure that servers which are not configured to support email address verification do not offer it in the registration flows. \ No newline at end of file diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index f920c2f6c1..333eb30625 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -61,7 +61,8 @@ class AuthHandler(BaseHandler): self.checkers = {} # type: dict[str, UserInteractiveAuthChecker] for auth_checker_class in INTERACTIVE_AUTH_CHECKERS: inst = auth_checker_class(hs) - self.checkers[inst.AUTH_TYPE] = inst + if inst.is_enabled(): + self.checkers[inst.AUTH_TYPE] = inst self.bcrypt_rounds = hs.config.bcrypt_rounds @@ -156,6 +157,14 @@ class AuthHandler(BaseHandler): return params + def get_enabled_auth_types(self): + """Return the enabled user-interactive authentication types + + Returns the UI-Auth types which are supported by the homeserver's current + config. + """ + return self.checkers.keys() + @defer.inlineCallbacks def check_auth(self, flows, clientdict, clientip): """ diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index fd633b7b0e..ee69223243 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -32,6 +32,13 @@ class UserInteractiveAuthChecker: def __init__(self, hs): pass + def is_enabled(self): + """Check if the configuration of the homeserver allows this checker to work + + Returns: + bool: True if this login type is enabled. + """ + def check_auth(self, authdict, clientip): """Given the authentication dict from the client, attempt to check this step @@ -51,6 +58,9 @@ class UserInteractiveAuthChecker: class DummyAuthChecker(UserInteractiveAuthChecker): AUTH_TYPE = LoginType.DUMMY + def is_enabled(self): + return True + def check_auth(self, authdict, clientip): return defer.succeed(True) @@ -58,6 +68,9 @@ class DummyAuthChecker(UserInteractiveAuthChecker): class TermsAuthChecker(UserInteractiveAuthChecker): AUTH_TYPE = LoginType.TERMS + def is_enabled(self): + return True + def check_auth(self, authdict, clientip): return defer.succeed(True) @@ -67,10 +80,14 @@ class RecaptchaAuthChecker(UserInteractiveAuthChecker): def __init__(self, hs): super().__init__(hs) + self._enabled = bool(hs.config.recaptcha_private_key) self._http_client = hs.get_simple_http_client() self._url = hs.config.recaptcha_siteverify_api self._secret = hs.config.recaptcha_private_key + def is_enabled(self): + return self._enabled + @defer.inlineCallbacks def check_auth(self, authdict, clientip): try: @@ -191,6 +208,12 @@ class EmailIdentityAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChec UserInteractiveAuthChecker.__init__(self, hs) _BaseThreepidAuthChecker.__init__(self, hs) + def is_enabled(self): + return self.hs.config.threepid_behaviour_email in ( + ThreepidBehaviour.REMOTE, + ThreepidBehaviour.LOCAL, + ) + def check_auth(self, authdict, clientip): return self._check_threepid("email", authdict) @@ -202,6 +225,9 @@ class MsisdnAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChecker): UserInteractiveAuthChecker.__init__(self, hs) _BaseThreepidAuthChecker.__init__(self, hs) + def is_enabled(self): + return bool(self.hs.config.account_threepid_delegate_msisdn) + def check_auth(self, authdict, clientip): return self._check_threepid("msisdn", authdict) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index e3f3d9126f..4f24a124a6 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -32,12 +32,14 @@ from synapse.api.errors import ( ThreepidValidationError, UnrecognizedRequestError, ) +from synapse.config import ConfigError from synapse.config.captcha import CaptchaConfig from synapse.config.consent_config import ConsentConfig from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.ratelimiting import FederationRateLimitConfig from synapse.config.registration import RegistrationConfig from synapse.config.server import is_threepid_reserved +from synapse.handlers.auth import AuthHandler from synapse.http.server import finish_request from synapse.http.servlet import ( RestServlet, @@ -375,7 +377,9 @@ class RegisterRestServlet(RestServlet): self.ratelimiter = hs.get_registration_ratelimiter() self.clock = hs.get_clock() - self._registration_flows = _calculate_registration_flows(hs.config) + self._registration_flows = _calculate_registration_flows( + hs.config, self.auth_handler + ) @interactive_auth_handler @defer.inlineCallbacks @@ -664,11 +668,13 @@ class RegisterRestServlet(RestServlet): def _calculate_registration_flows( # technically `config` has to provide *all* of these interfaces, not just one config: Union[RegistrationConfig, ConsentConfig, CaptchaConfig], + auth_handler: AuthHandler, ) -> List[List[str]]: """Get a suitable flows list for registration Args: config: server configuration + auth_handler: authorization handler Returns: a list of supported flows """ @@ -678,10 +684,29 @@ def _calculate_registration_flows( require_msisdn = "msisdn" in config.registrations_require_3pid show_msisdn = True + show_email = True + if config.disable_msisdn_registration: show_msisdn = False require_msisdn = False + enabled_auth_types = auth_handler.get_enabled_auth_types() + if LoginType.EMAIL_IDENTITY not in enabled_auth_types: + show_email = False + if require_email: + raise ConfigError( + "Configuration requires email address at registration, but email " + "validation is not configured" + ) + + if LoginType.MSISDN not in enabled_auth_types: + show_msisdn = False + if require_msisdn: + raise ConfigError( + "Configuration requires msisdn at registration, but msisdn " + "validation is not configured" + ) + flows = [] # only support 3PIDless registration if no 3PIDs are required @@ -693,14 +718,15 @@ def _calculate_registration_flows( flows.append([LoginType.DUMMY]) # only support the email-only flow if we don't require MSISDN 3PIDs - if not require_msisdn: + if show_email and not require_msisdn: flows.append([LoginType.EMAIL_IDENTITY]) # only support the MSISDN-only flow if we don't require email 3PIDs if show_msisdn and not require_email: flows.append([LoginType.MSISDN]) - if show_msisdn: + if show_email and show_msisdn: + # always let users provide both MSISDN & email flows.append([LoginType.MSISDN, LoginType.EMAIL_IDENTITY]) # Prepend m.login.terms to all flows if we're requiring consent diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index bc2dc47973..dab87e5edf 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -198,16 +198,8 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.result["code"], b"401", channel.result) flows = channel.json_body["flows"] - # with the stock config, we expect all four combinations of 3pid - self.assertCountEqual( - [ - ["m.login.dummy"], - ["m.login.email.identity"], - ["m.login.msisdn"], - ["m.login.msisdn", "m.login.email.identity"], - ], - (f["stages"] for f in flows), - ) + # with the stock config, we only expect the dummy flow + self.assertCountEqual([["m.login.dummy"]], (f["stages"] for f in flows)) @unittest.override_config( { @@ -217,9 +209,13 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "template_dir": "/", "require_at_registration": True, }, + "account_threepid_delegates": { + "email": "https://id_server", + "msisdn": "https://id_server", + }, } ) - def test_advertised_flows_captcha_and_terms(self): + def test_advertised_flows_captcha_and_terms_and_3pids(self): request, channel = self.make_request(b"POST", self.url, b"{}") self.render(request) self.assertEquals(channel.result["code"], b"401", channel.result) @@ -241,7 +237,16 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): ) @unittest.override_config( - {"registrations_require_3pid": ["email"], "disable_msisdn_registration": True} + { + "public_baseurl": "https://test_server", + "registrations_require_3pid": ["email"], + "disable_msisdn_registration": True, + "email": { + "smtp_host": "mail_server", + "smtp_port": 2525, + "notif_from": "sender@host", + }, + } ) def test_advertised_flows_no_msisdn_email_required(self): request, channel = self.make_request(b"POST", self.url, b"{}") -- cgit 1.4.1 From 77dc7093a738ec4e172c92b7a53d58aa41bfec0a Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 25 Sep 2019 12:29:35 +0100 Subject: Threepid validity checks on msisdns should not be dependent on 'threepid_behaviour_email'. (#6104) Fixes #6103 --- changelog.d/6104.bugfix | 1 + synapse/handlers/ui_auth/checkers.py | 63 +++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 29 deletions(-) create mode 100644 changelog.d/6104.bugfix (limited to 'synapse') diff --git a/changelog.d/6104.bugfix b/changelog.d/6104.bugfix new file mode 100644 index 0000000000..41114a66ef --- /dev/null +++ b/changelog.d/6104.bugfix @@ -0,0 +1 @@ +Threepid validity checks on msisdns should not be dependent on 'threepid_behaviour_email'. diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index ee69223243..29aa1e5aaf 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -148,42 +148,47 @@ class _BaseThreepidAuthChecker: identity_handler = self.hs.get_handlers().identity_handler logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - if medium == "email": + + # msisdns are currently always ThreepidBehaviour.REMOTE + if medium == "msisdn": + if not self.hs.config.account_threepid_delegate_msisdn: + raise SynapseError( + 400, "Phone number verification is not enabled on this homeserver" + ) + threepid = yield identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_msisdn, threepid_creds + ) + elif medium == "email": + if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + assert self.hs.config.account_threepid_delegate_email threepid = yield identity_handler.threepid_from_creds( self.hs.config.account_threepid_delegate_email, threepid_creds ) - elif medium == "msisdn": - threepid = yield identity_handler.threepid_from_creds( - self.hs.config.account_threepid_delegate_msisdn, threepid_creds + elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + threepid = None + row = yield self.store.get_threepid_validation_session( + medium, + threepid_creds["client_secret"], + sid=threepid_creds["sid"], + validated=True, ) - else: - raise SynapseError(400, "Unrecognized threepid medium: %s" % (medium,)) - elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: - row = yield self.store.get_threepid_validation_session( - medium, - threepid_creds["client_secret"], - sid=threepid_creds["sid"], - validated=True, - ) - threepid = ( - { - "medium": row["medium"], - "address": row["address"], - "validated_at": row["validated_at"], - } - if row - else None - ) + if row: + threepid = { + "medium": row["medium"], + "address": row["address"], + "validated_at": row["validated_at"], + } - if row: - # Valid threepid returned, delete from the db - yield self.store.delete_threepid_session(threepid_creds["sid"]) + # Valid threepid returned, delete from the db + yield self.store.delete_threepid_session(threepid_creds["sid"]) + else: + raise SynapseError( + 400, "Email address verification is not enabled on this homeserver" + ) else: - raise SynapseError( - 400, "Password resets are not enabled on this homeserver" - ) + # this can't happen! + raise AssertionError("Unrecognized threepid medium: %s" % (medium,)) if not threepid: raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) -- cgit 1.4.1 From 50572db837f3e6a0869e9ec573e02d4af72548ea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 17:00:23 +0100 Subject: Use if `is not None` Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/client_ips.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a4e6d9dbe7..8996689744 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -393,7 +393,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): """ keyvalues = {"user_id": user_id} - if device_id: + if device_id is not None: keyvalues["device_id"] = device_id res = yield self._simple_select_list( -- cgit 1.4.1 From 39b50ad42a8cf784e627959e9652589338121ccd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 17:22:33 +0100 Subject: Review comments --- docs/sample_config.yaml | 2 +- synapse/config/server.py | 2 +- synapse/storage/background_updates.py | 5 +---- synapse/storage/client_ips.py | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index cc6035c838..7902d9ed6f 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -315,7 +315,7 @@ redaction_retention_period: 7d # How long to track users' last seen time and IPs in the database. # -# Defaults to `28d`. Set to `null` to disable. +# Defaults to `28d`. Set to `null` to disable clearing out of old rows. # #user_ips_max_age: 14d diff --git a/synapse/config/server.py b/synapse/config/server.py index 655e7487a4..f8b7b4bef9 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -745,7 +745,7 @@ class ServerConfig(Config): # How long to track users' last seen time and IPs in the database. # - # Defaults to `28d`. Set to `null` to disable. + # Defaults to `28d`. Set to `null` to disable clearing out of old rows. # #user_ips_max_age: 14d """ diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 3fc25cd828..30788137a8 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -148,11 +148,8 @@ class BackgroundUpdateStore(SQLBaseStore): return False - async def has_completed_background_update(self, update_name): + async def has_completed_background_update(self, update_name) -> bool: """Check if the given background update has finished running. - - Returns: - Deferred[bool] """ if self._all_done: diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 176c812b1f..a4d40dfa1e 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -506,7 +506,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): """Removes entries in user IPs older than the configured period. """ - if not self.user_ips_max_age: + if self.user_ips_max_age is None: # Nothing to do return -- cgit 1.4.1 From a4f3ca48b5250a1c2c4de8a363f69bbeb0adeefd Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 25 Sep 2019 17:27:35 +0100 Subject: Enable cleaning up extremities with dummy events by default to prevent undue build up of forward extremities. (#5884) --- changelog.d/5884.feature | 1 + synapse/config/server.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) create mode 100644 changelog.d/5884.feature (limited to 'synapse') diff --git a/changelog.d/5884.feature b/changelog.d/5884.feature new file mode 100644 index 0000000000..bfd0489392 --- /dev/null +++ b/changelog.d/5884.feature @@ -0,0 +1 @@ +Enable cleaning up extremities with dummy events by default to prevent undue build up of forward extremities. diff --git a/synapse/config/server.py b/synapse/config/server.py index 419787a89c..3a7a49bc91 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -355,10 +355,8 @@ class ServerConfig(Config): _check_resource_config(self.listeners) - # An experimental option to try and periodically clean up extremities - # by sending dummy events. self.cleanup_extremities_with_dummy_events = config.get( - "cleanup_extremities_with_dummy_events", False + "cleanup_extremities_with_dummy_events", True ) def has_tls_listener(self): -- cgit 1.4.1 From a96318127dc17ee102bcf90821d90b7e6079a85d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Sep 2019 18:17:39 +0100 Subject: Update comments and docstring --- synapse/metrics/background_process_metrics.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index b24e2fab4a..c53d2a0d40 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -175,7 +175,7 @@ def run_as_background_process(desc, func, *args, **kwargs): Args: desc (str): a description for this background process type - func: a function, which may return a Deferred + func: a function, which may return a Deferred or a coroutine args: positional args for func kwargs: keyword args for func @@ -199,11 +199,13 @@ def run_as_background_process(desc, func, *args, **kwargs): _background_processes.setdefault(desc, set()).add(proc) try: - # We ensureDeferred here to handle coroutines result = func(*args, **kwargs) - # We need this check because ensureDeferred doesn't like when - # func doesn't return a Deferred or coroutine. + # We probably don't have an ensureDeferred in our call stack to handle + # coroutine results, so we need to ensureDeferred here. + # + # But we need this check because ensureDeferred doesn't like being + # called on immediate values (as opposed to Deferreds or coroutines). if iscoroutine(result): result = defer.ensureDeferred(result) -- cgit 1.4.1 From 034db2ba2115d935ce62b641b4051e477a454eac Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 26 Sep 2019 11:47:53 +0100 Subject: Fix dummy event insertion consent bug (#6053) Fixes #5905 --- changelog.d/6053.bugfix | 1 + synapse/handlers/message.py | 99 ++++++++++++++++------ synapse/storage/event_federation.py | 18 +++- tests/storage/test_cleanup_extrems.py | 147 +++++++++++++++++++++++++++++++-- tests/storage/test_event_federation.py | 40 +++++++++ 5 files changed, 266 insertions(+), 39 deletions(-) create mode 100644 changelog.d/6053.bugfix (limited to 'synapse') diff --git a/changelog.d/6053.bugfix b/changelog.d/6053.bugfix new file mode 100644 index 0000000000..6311157bf6 --- /dev/null +++ b/changelog.d/6053.bugfix @@ -0,0 +1 @@ +Prevent exceptions being logged when extremity-cleanup events fail due to lack of user consent to the terms of service. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1f8272784e..0f8cce8ffe 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -222,6 +222,13 @@ class MessageHandler(object): } +# The duration (in ms) after which rooms should be removed +# `_rooms_to_exclude_from_dummy_event_insertion` (with the effect that we will try +# to generate a dummy event for them once more) +# +_DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY = 7 * 24 * 60 * 60 * 1000 + + class EventCreationHandler(object): def __init__(self, hs): self.hs = hs @@ -258,6 +265,13 @@ class EventCreationHandler(object): self.config.block_events_without_consent_error ) + # Rooms which should be excluded from dummy insertion. (For instance, + # those without local users who can send events into the room). + # + # map from room id to time-of-last-attempt. + # + self._rooms_to_exclude_from_dummy_event_insertion = {} # type: dict[str, int] + # we need to construct a ConsentURIBuilder here, as it checks that the necessary # config options, but *only* if we have a configuration for which we are # going to need it. @@ -888,9 +902,11 @@ class EventCreationHandler(object): """Background task to send dummy events into rooms that have a large number of extremities """ - + self._expire_rooms_to_exclude_from_dummy_event_insertion() room_ids = yield self.store.get_rooms_with_many_extremities( - min_count=10, limit=5 + min_count=10, + limit=5, + room_id_filter=self._rooms_to_exclude_from_dummy_event_insertion.keys(), ) for room_id in room_ids: @@ -904,32 +920,61 @@ class EventCreationHandler(object): members = yield self.state.get_current_users_in_room( room_id, latest_event_ids=latest_event_ids ) + dummy_event_sent = False + for user_id in members: + if not self.hs.is_mine_id(user_id): + continue + requester = create_requester(user_id) + try: + event, context = yield self.create_event( + requester, + { + "type": "org.matrix.dummy_event", + "content": {}, + "room_id": room_id, + "sender": user_id, + }, + prev_events_and_hashes=prev_events_and_hashes, + ) - user_id = None - for member in members: - if self.hs.is_mine_id(member): - user_id = member - break - - if not user_id: - # We don't have a joined user. - # TODO: We should do something here to stop the room from - # appearing next time. - continue + event.internal_metadata.proactively_send = False - requester = create_requester(user_id) + yield self.send_nonmember_event( + requester, event, context, ratelimit=False + ) + dummy_event_sent = True + break + except ConsentNotGivenError: + logger.info( + "Failed to send dummy event into room %s for user %s due to " + "lack of consent. Will try another user" % (room_id, user_id) + ) + except AuthError: + logger.info( + "Failed to send dummy event into room %s for user %s due to " + "lack of power. Will try another user" % (room_id, user_id) + ) - event, context = yield self.create_event( - requester, - { - "type": "org.matrix.dummy_event", - "content": {}, - "room_id": room_id, - "sender": user_id, - }, - prev_events_and_hashes=prev_events_and_hashes, + if not dummy_event_sent: + # Did not find a valid user in the room, so remove from future attempts + # Exclusion is time limited, so the room will be rechecked in the future + # dependent on _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY + logger.info( + "Failed to send dummy event into room %s. Will exclude it from " + "future attempts until cache expires" % (room_id,) + ) + now = self.clock.time_msec() + self._rooms_to_exclude_from_dummy_event_insertion[room_id] = now + + def _expire_rooms_to_exclude_from_dummy_event_insertion(self): + expire_before = self.clock.time_msec() - _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY + to_expire = set() + for room_id, time in self._rooms_to_exclude_from_dummy_event_insertion.items(): + if time < expire_before: + to_expire.add(room_id) + for room_id in to_expire: + logger.debug( + "Expiring room id %s from dummy event insertion exclusion cache", + room_id, ) - - event.internal_metadata.proactively_send = False - - yield self.send_nonmember_event(requester, event, context, ratelimit=False) + del self._rooms_to_exclude_from_dummy_event_insertion[room_id] diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 4f500d893e..f5e8c39262 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import itertools import logging import random @@ -190,12 +191,13 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas room_id, ) - def get_rooms_with_many_extremities(self, min_count, limit): + def get_rooms_with_many_extremities(self, min_count, limit, room_id_filter): """Get the top rooms with at least N extremities. Args: min_count (int): The minimum number of extremities limit (int): The maximum number of rooms to return. + room_id_filter (iterable[str]): room_ids to exclude from the results Returns: Deferred[list]: At most `limit` room IDs that have at least @@ -203,15 +205,25 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas """ def _get_rooms_with_many_extremities_txn(txn): + where_clause = "1=1" + if room_id_filter: + where_clause = "room_id NOT IN (%s)" % ( + ",".join("?" for _ in room_id_filter), + ) + sql = """ SELECT room_id FROM event_forward_extremities + WHERE %s GROUP BY room_id HAVING count(*) > ? ORDER BY count(*) DESC LIMIT ? - """ + """ % ( + where_clause, + ) - txn.execute(sql, (min_count, limit)) + query_args = list(itertools.chain(room_id_filter, [min_count, limit])) + txn.execute(sql, query_args) return [room_id for room_id, in txn] return self.runInteraction( diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py index e9e2d5337c..34f9c72709 100644 --- a/tests/storage/test_cleanup_extrems.py +++ b/tests/storage/test_cleanup_extrems.py @@ -14,7 +14,13 @@ # limitations under the License. import os.path +from unittest.mock import patch +from mock import Mock + +import synapse.rest.admin +from synapse.api.constants import EventTypes +from synapse.rest.client.v1 import login, room from synapse.storage import prepare_database from synapse.types import Requester, UserID @@ -225,6 +231,14 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): class CleanupExtremDummyEventsTestCase(HomeserverTestCase): + CONSENT_VERSION = "1" + EXTREMITIES_COUNT = 50 + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + room.register_servlets, + ] + def make_homeserver(self, reactor, clock): config = self.default_config() config["cleanup_extremities_with_dummy_events"] = True @@ -233,33 +247,148 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase): def prepare(self, reactor, clock, homeserver): self.store = homeserver.get_datastore() self.room_creator = homeserver.get_room_creation_handler() + self.event_creator_handler = homeserver.get_event_creation_handler() # Create a test user and room - self.user = UserID("alice", "test") + self.user = UserID.from_string(self.register_user("user1", "password")) + self.token1 = self.login("user1", "password") self.requester = Requester(self.user, None, False, None, None) info = self.get_success(self.room_creator.create_room(self.requester, {})) self.room_id = info["room_id"] + self.event_creator = homeserver.get_event_creation_handler() + homeserver.config.user_consent_version = self.CONSENT_VERSION def test_send_dummy_event(self): - # Create a bushy graph with 50 extremities. - - event_id_start = self.create_and_send_event(self.room_id, self.user) + self._create_extremity_rich_graph() - for _ in range(50): - self.create_and_send_event( - self.room_id, self.user, prev_event_ids=[event_id_start] - ) + # Pump the reactor repeatedly so that the background updates have a + # chance to run. + self.pump(10 * 60) latest_event_ids = self.get_success( self.store.get_latest_event_ids_in_room(self.room_id) ) - self.assertEqual(len(latest_event_ids), 50) + self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids)) + @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=0) + def test_send_dummy_events_when_insufficient_power(self): + self._create_extremity_rich_graph() + # Criple power levels + self.helper.send_state( + self.room_id, + EventTypes.PowerLevels, + body={"users": {str(self.user): -1}}, + tok=self.token1, + ) # Pump the reactor repeatedly so that the background updates have a # chance to run. self.pump(10 * 60) + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + # Check that the room has not been pruned + self.assertTrue(len(latest_event_ids) > 10) + + # New user with regular levels + user2 = self.register_user("user2", "password") + token2 = self.login("user2", "password") + self.helper.join(self.room_id, user2, tok=token2) + self.pump(10 * 60) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids)) + + @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=0) + def test_send_dummy_event_without_consent(self): + self._create_extremity_rich_graph() + self._enable_consent_checking() + + # Pump the reactor repeatedly so that the background updates have a + # chance to run. Attempt to add dummy event with user that has not consented + # Check that dummy event send fails. + self.pump(10 * 60) + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertTrue(len(latest_event_ids) == self.EXTREMITIES_COUNT) + + # Create new user, and add consent + user2 = self.register_user("user2", "password") + token2 = self.login("user2", "password") + self.get_success( + self.store.user_set_consent_version(user2, self.CONSENT_VERSION) + ) + self.helper.join(self.room_id, user2, tok=token2) + + # Background updates should now cause a dummy event to be added to the graph + self.pump(10 * 60) + latest_event_ids = self.get_success( self.store.get_latest_event_ids_in_room(self.room_id) ) self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids)) + + @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=250) + def test_expiry_logic(self): + """Simple test to ensure that _expire_rooms_to_exclude_from_dummy_event_insertion() + expires old entries correctly. + """ + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion[ + "1" + ] = 100000 + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion[ + "2" + ] = 200000 + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion[ + "3" + ] = 300000 + self.event_creator_handler._expire_rooms_to_exclude_from_dummy_event_insertion() + # All entries within time frame + self.assertEqual( + len( + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion + ), + 3, + ) + # Oldest room to expire + self.pump(1) + self.event_creator_handler._expire_rooms_to_exclude_from_dummy_event_insertion() + self.assertEqual( + len( + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion + ), + 2, + ) + # All rooms to expire + self.pump(2) + self.assertEqual( + len( + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion + ), + 0, + ) + + def _create_extremity_rich_graph(self): + """Helper method to create bushy graph on demand""" + + event_id_start = self.create_and_send_event(self.room_id, self.user) + + for _ in range(self.EXTREMITIES_COUNT): + self.create_and_send_event( + self.room_id, self.user, prev_event_ids=[event_id_start] + ) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(len(latest_event_ids), 50) + + def _enable_consent_checking(self): + """Helper method to enable consent checking""" + self.event_creator._block_events_without_consent_error = "No consent from user" + consent_uri_builder = Mock() + consent_uri_builder.build_user_consent_uri.return_value = "http://example.com" + self.event_creator._consent_uri_builder = consent_uri_builder diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 86c7ac350d..b58386994e 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -75,3 +75,43 @@ class EventFederationWorkerStoreTestCase(tests.unittest.TestCase): el = r[i] depth = el[2] self.assertLessEqual(5, depth) + + @defer.inlineCallbacks + def test_get_rooms_with_many_extremities(self): + room1 = "#room1" + room2 = "#room2" + room3 = "#room3" + + def insert_event(txn, i, room_id): + event_id = "$event_%i:local" % i + txn.execute( + ( + "INSERT INTO event_forward_extremities (room_id, event_id) " + "VALUES (?, ?)" + ), + (room_id, event_id), + ) + + for i in range(0, 20): + yield self.store.runInteraction("insert", insert_event, i, room1) + yield self.store.runInteraction("insert", insert_event, i, room2) + yield self.store.runInteraction("insert", insert_event, i, room3) + + # Test simple case + r = yield self.store.get_rooms_with_many_extremities(5, 5, []) + self.assertEqual(len(r), 3) + + # Does filter work? + + r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1]) + self.assertTrue(room2 in r) + self.assertTrue(room3 in r) + self.assertEqual(len(r), 2) + + r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1, room2]) + self.assertEqual(r, [room3]) + + # Does filter and limit work? + + r = yield self.store.get_rooms_with_many_extremities(5, 1, [room1]) + self.assertTrue(r == [room2] or r == [room3]) -- cgit 1.4.1 From 2927c6bc4c4e0c975a875d7eb5aa736b6abd66cd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Sep 2019 12:29:59 +0100 Subject: bump version --- synapse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/__init__.py b/synapse/__init__.py index 6766ef445c..ddfe9ec542 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -35,4 +35,4 @@ try: except ImportError: pass -__version__ = "1.3.1" +__version__ = "1.4.0rc1" -- cgit 1.4.1 From 8b8f8c7b3c6136ea777265fff8052afed2b7031e Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 26 Sep 2019 12:57:01 +0100 Subject: Explicitly log when a homeserver does not have a trusted key server configured (#6090) --- changelog.d/6090.feature | 1 + docs/sample_config.yaml | 14 ++++++++++---- synapse/config/key.py | 48 ++++++++++++++++++++++++++++++++++++++++++++---- synapse/config/server.py | 16 ++++++++-------- 4 files changed, 63 insertions(+), 16 deletions(-) create mode 100644 changelog.d/6090.feature (limited to 'synapse') diff --git a/changelog.d/6090.feature b/changelog.d/6090.feature new file mode 100644 index 0000000000..a6da448a1a --- /dev/null +++ b/changelog.d/6090.feature @@ -0,0 +1 @@ +Explicitly log when a homeserver does not have the 'trusted_key_servers' config field configured. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 8f801daf35..254e1b17b4 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1072,6 +1072,10 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # This setting supercedes an older setting named `perspectives`. The old format # is still supported for backwards-compatibility, but it is deprecated. # +# 'trusted_key_servers' defaults to matrix.org, but using it will generate a +# warning on start-up. To suppress this warning, set +# 'suppress_key_server_warning' to true. +# # Options for each entry in the list include: # # server_name: the name of the server. required. @@ -1096,11 +1100,13 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # "ed25519:auto": "abcdefghijklmnopqrstuvwxyzabcdefghijklmopqr" # - server_name: "my_other_trusted_server.example.com" # -# The default configuration is: -# -#trusted_key_servers: -# - server_name: "matrix.org" +trusted_key_servers: + - server_name: "matrix.org" + +# Uncomment the following to disable the warning that is emitted when the +# trusted_key_servers include 'matrix.org'. See above. # +#suppress_key_server_warning: true # The signing keys to use when acting as a trusted key server. If not specified # defaults to the server signing key. diff --git a/synapse/config/key.py b/synapse/config/key.py index ba2199bceb..f039f96e9c 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -50,6 +50,33 @@ and you should enable 'federation_verify_certificates' in your configuration. If you are *sure* you want to do this, set 'accept_keys_insecurely' on the trusted_key_server configuration.""" +TRUSTED_KEY_SERVER_NOT_CONFIGURED_WARN = """\ +Synapse requires that a list of trusted key servers are specified in order to +provide signing keys for other servers in the federation. + +This homeserver does not have a trusted key server configured in +homeserver.yaml and will fall back to the default of 'matrix.org'. + +Trusted key servers should be long-lived and stable which makes matrix.org a +good choice for many admins, but some admins may wish to choose another. To +suppress this warning, the admin should set 'trusted_key_servers' in +homeserver.yaml to their desired key server and 'suppress_key_server_warning' +to 'true'. + +In a future release the software-defined default will be removed entirely and +the trusted key server will be defined exclusively by the value of +'trusted_key_servers'. +--------------------------------------------------------------------------------""" + +TRUSTED_KEY_SERVER_CONFIGURED_AS_M_ORG_WARN = """\ +This server is configured to use 'matrix.org' as its trusted key server via the +'trusted_key_servers' config option. 'matrix.org' is a good choice for a key +server since it is long-lived, stable and trusted. However, some admins may +wish to use another server for this purpose. + +To suppress this warning and continue using 'matrix.org', admins should set +'suppress_key_server_warning' to 'true' in homeserver.yaml. +--------------------------------------------------------------------------------""" logger = logging.getLogger(__name__) @@ -85,6 +112,7 @@ class KeyConfig(Config): config.get("key_refresh_interval", "1d") ) + suppress_key_server_warning = config.get("suppress_key_server_warning", False) key_server_signing_keys_path = config.get("key_server_signing_keys_path") if key_server_signing_keys_path: self.key_server_signing_keys = self.read_signing_keys( @@ -95,6 +123,7 @@ class KeyConfig(Config): # if neither trusted_key_servers nor perspectives are given, use the default. if "perspectives" not in config and "trusted_key_servers" not in config: + logger.warn(TRUSTED_KEY_SERVER_NOT_CONFIGURED_WARN) key_servers = [{"server_name": "matrix.org"}] else: key_servers = config.get("trusted_key_servers", []) @@ -108,6 +137,11 @@ class KeyConfig(Config): # merge the 'perspectives' config into the 'trusted_key_servers' config. key_servers.extend(_perspectives_to_key_servers(config)) + if not suppress_key_server_warning and "matrix.org" in ( + s["server_name"] for s in key_servers + ): + logger.warning(TRUSTED_KEY_SERVER_CONFIGURED_AS_M_ORG_WARN) + # list of TrustedKeyServer objects self.key_servers = list( _parse_key_servers(key_servers, self.federation_verify_certificates) @@ -190,6 +224,10 @@ class KeyConfig(Config): # This setting supercedes an older setting named `perspectives`. The old format # is still supported for backwards-compatibility, but it is deprecated. # + # 'trusted_key_servers' defaults to matrix.org, but using it will generate a + # warning on start-up. To suppress this warning, set + # 'suppress_key_server_warning' to true. + # # Options for each entry in the list include: # # server_name: the name of the server. required. @@ -214,11 +252,13 @@ class KeyConfig(Config): # "ed25519:auto": "abcdefghijklmnopqrstuvwxyzabcdefghijklmopqr" # - server_name: "my_other_trusted_server.example.com" # - # The default configuration is: - # - #trusted_key_servers: - # - server_name: "matrix.org" + trusted_key_servers: + - server_name: "matrix.org" + + # Uncomment the following to disable the warning that is emitted when the + # trusted_key_servers include 'matrix.org'. See above. # + #suppress_key_server_warning: true # The signing keys to use when acting as a trusted key server. If not specified # defaults to the server signing key. diff --git a/synapse/config/server.py b/synapse/config/server.py index 9d3f1b5bfc..5ad7ee911d 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -48,6 +48,13 @@ ROOM_COMPLEXITY_TOO_GREAT = ( "to join this room." ) +METRICS_PORT_WARNING = """\ +The metrics_port configuration option is deprecated in Synapse 0.31 in favour of +a listener. Please see +https://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md +on how to configure the new listener. +--------------------------------------------------------------------------------""" + class ServerConfig(Config): def read_config(self, config, **kwargs): @@ -341,14 +348,7 @@ class ServerConfig(Config): metrics_port = config.get("metrics_port") if metrics_port: - logger.warn( - ( - "The metrics_port configuration option is deprecated in Synapse 0.31 " - "in favour of a listener. Please see " - "http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md" - " on how to configure the new listener." - ) - ) + logger.warning(METRICS_PORT_WARNING) self.listeners.append( { -- cgit 1.4.1 From 54569c787b4abbc5674d9c23c012b56d8cc156ef Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 26 Sep 2019 15:38:25 +0100 Subject: Kill off half-implemented password-reset via sms (#6101) Doing a password reset via SMS has never worked, and in any case is a silly idea because msisdn recycling is a thing. See also matrix-org/matrix-doc#2303. --- changelog.d/6101.misc | 1 + synapse/rest/client/v2_alpha/account.py | 65 +-------------------------------- 2 files changed, 2 insertions(+), 64 deletions(-) create mode 100644 changelog.d/6101.misc (limited to 'synapse') diff --git a/changelog.d/6101.misc b/changelog.d/6101.misc new file mode 100644 index 0000000000..9743abb9e9 --- /dev/null +++ b/changelog.d/6101.misc @@ -0,0 +1 @@ +Kill off half-implemented password-reset via sms. diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index f99676fd30..80cf7126a0 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -129,66 +129,6 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): return 200, ret -class MsisdnPasswordRequestTokenRestServlet(RestServlet): - PATTERNS = client_patterns("/account/password/msisdn/requestToken$") - - def __init__(self, hs): - super(MsisdnPasswordRequestTokenRestServlet, self).__init__() - self.hs = hs - self.datastore = self.hs.get_datastore() - self.identity_handler = hs.get_handlers().identity_handler - - @defer.inlineCallbacks - def on_POST(self, request): - body = parse_json_object_from_request(request) - - assert_params_in_dict( - body, ["client_secret", "country", "phone_number", "send_attempt"] - ) - client_secret = body["client_secret"] - country = body["country"] - phone_number = body["phone_number"] - send_attempt = body["send_attempt"] - next_link = body.get("next_link") # Optional param - - msisdn = phone_number_to_msisdn(country, phone_number) - - if not check_3pid_allowed(self.hs, "msisdn", msisdn): - raise SynapseError( - 403, - "Account phone numbers are not authorized on this server", - Codes.THREEPID_DENIED, - ) - - existing_user_id = yield self.datastore.get_user_id_by_threepid( - "msisdn", msisdn - ) - - if existing_user_id is None: - raise SynapseError(400, "MSISDN not found", Codes.THREEPID_NOT_FOUND) - - if not self.hs.config.account_threepid_delegate_msisdn: - logger.warn( - "No upstream msisdn account_threepid_delegate configured on the server to " - "handle this request" - ) - raise SynapseError( - 400, - "Password reset by phone number is not supported on this homeserver", - ) - - ret = yield self.identity_handler.requestMsisdnToken( - self.hs.config.account_threepid_delegate_msisdn, - country, - phone_number, - client_secret, - send_attempt, - next_link, - ) - - return 200, ret - - class PasswordResetSubmitTokenServlet(RestServlet): """Handles 3PID validation token submission""" @@ -301,9 +241,7 @@ class PasswordRestServlet(RestServlet): else: requester = None result, params, _ = yield self.auth_handler.check_auth( - [[LoginType.EMAIL_IDENTITY], [LoginType.MSISDN]], - body, - self.hs.get_ip_from_request(request), + [[LoginType.EMAIL_IDENTITY]], body, self.hs.get_ip_from_request(request) ) if LoginType.EMAIL_IDENTITY in result: @@ -843,7 +781,6 @@ class WhoamiRestServlet(RestServlet): def register_servlets(hs, http_server): EmailPasswordRequestTokenRestServlet(hs).register(http_server) - MsisdnPasswordRequestTokenRestServlet(hs).register(http_server) PasswordResetSubmitTokenServlet(hs).register(http_server) PasswordRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) -- cgit 1.4.1 From 3423633d50723a818975317c33545385b53b372f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 26 Sep 2019 16:39:15 +0100 Subject: Fix 'redaction_retention_period' sampel config to match guidelines --- docs/sample_config.yaml | 2 +- synapse/config/server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 254e1b17b4..43893399ad 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -314,7 +314,7 @@ listeners: # # Defaults to `7d`. Set to `null` to disable. # -redaction_retention_period: 7d +#redaction_retention_period: 28d # How long to track users' last seen time and IPs in the database. # diff --git a/synapse/config/server.py b/synapse/config/server.py index 5ad7ee911d..536ee7f29c 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -742,7 +742,7 @@ class ServerConfig(Config): # # Defaults to `7d`. Set to `null` to disable. # - redaction_retention_period: 7d + #redaction_retention_period: 28d # How long to track users' last seen time and IPs in the database. # -- cgit 1.4.1 From 8c27bc8b60d4b78c059ea727a78e78dc8cd3df7a Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 27 Sep 2019 10:36:20 +0100 Subject: Move lookup-related functions from RoomMemberHandler to IdentityHandler (#5978) Just to have all the methods that make calls to identity services in one place. --- changelog.d/5978.misc | 1 + synapse/handlers/identity.py | 353 ++++++++++++++++++++++++++++++++++++++ synapse/handlers/room_member.py | 370 +--------------------------------------- 3 files changed, 360 insertions(+), 364 deletions(-) create mode 100644 changelog.d/5978.misc (limited to 'synapse') diff --git a/changelog.d/5978.misc b/changelog.d/5978.misc new file mode 100644 index 0000000000..6d2b69b11b --- /dev/null +++ b/changelog.d/5978.misc @@ -0,0 +1 @@ +Move lookup-related functions from RoomMemberHandler to IdentityHandler. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 6d42a1aed8..ba99ddf76d 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -21,11 +21,15 @@ import logging import urllib from canonicaljson import json +from signedjson.key import decode_verify_key_bytes +from signedjson.sign import verify_signed_json +from unpaddedbase64 import decode_base64 from twisted.internet import defer from twisted.internet.error import TimeoutError from synapse.api.errors import ( + AuthError, CodeMessageException, Codes, HttpResponseException, @@ -33,12 +37,15 @@ from synapse.api.errors import ( ) from synapse.config.emailconfig import ThreepidBehaviour from synapse.http.client import SimpleHttpClient +from synapse.util.hash import sha256_and_url_safe_base64 from synapse.util.stringutils import random_string from ._base import BaseHandler logger = logging.getLogger(__name__) +id_server_scheme = "https://" + class IdentityHandler(BaseHandler): def __init__(self, hs): @@ -557,6 +564,352 @@ class IdentityHandler(BaseHandler): logger.warning("Error contacting msisdn account_threepid_delegate: %s", e) raise SynapseError(400, "Error contacting the identity server") + @defer.inlineCallbacks + def lookup_3pid(self, id_server, medium, address, id_access_token=None): + """Looks up a 3pid in the passed identity server. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + id_access_token (str|None): The access token to authenticate to the identity + server with + + Returns: + str|None: the matrix ID of the 3pid, or None if it is not recognized. + """ + if id_access_token is not None: + try: + results = yield self._lookup_3pid_v2( + id_server, id_access_token, medium, address + ) + return results + + except Exception as e: + # Catch HttpResponseExcept for a non-200 response code + # Check if this identity server does not know about v2 lookups + if isinstance(e, HttpResponseException) and e.code == 404: + # This is an old identity server that does not yet support v2 lookups + logger.warning( + "Attempted v2 lookup on v1 identity server %s. Falling " + "back to v1", + id_server, + ) + else: + logger.warning("Error when looking up hashing details: %s", e) + return None + + return (yield self._lookup_3pid_v1(id_server, medium, address)) + + @defer.inlineCallbacks + def _lookup_3pid_v1(self, id_server, medium, address): + """Looks up a 3pid in the passed identity server using v1 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + + Returns: + str: the matrix ID of the 3pid, or None if it is not recognized. + """ + try: + data = yield self.blacklisting_http_client.get_json( + "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), + {"medium": medium, "address": address}, + ) + + if "mxid" in data: + if "signatures" not in data: + raise AuthError(401, "No signatures on 3pid binding") + yield self._verify_any_signature(data, id_server) + return data["mxid"] + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except IOError as e: + logger.warning("Error from v1 identity server lookup: %s" % (e,)) + + return None + + @defer.inlineCallbacks + def _lookup_3pid_v2(self, id_server, id_access_token, medium, address): + """Looks up a 3pid in the passed identity server using v2 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + id_access_token (str): The access token to authenticate to the identity server with + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + + Returns: + Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. + """ + # Check what hashing details are supported by this identity server + try: + hash_details = yield self.blacklisting_http_client.get_json( + "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), + {"access_token": id_access_token}, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + + if not isinstance(hash_details, dict): + logger.warning( + "Got non-dict object when checking hash details of %s%s: %s", + id_server_scheme, + id_server, + hash_details, + ) + raise SynapseError( + 400, + "Non-dict object from %s%s during v2 hash_details request: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Extract information from hash_details + supported_lookup_algorithms = hash_details.get("algorithms") + lookup_pepper = hash_details.get("lookup_pepper") + if ( + not supported_lookup_algorithms + or not isinstance(supported_lookup_algorithms, list) + or not lookup_pepper + or not isinstance(lookup_pepper, str) + ): + raise SynapseError( + 400, + "Invalid hash details received from identity server %s%s: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Check if any of the supported lookup algorithms are present + if LookupAlgorithm.SHA256 in supported_lookup_algorithms: + # Perform a hashed lookup + lookup_algorithm = LookupAlgorithm.SHA256 + + # Hash address, medium and the pepper with sha256 + to_hash = "%s %s %s" % (address, medium, lookup_pepper) + lookup_value = sha256_and_url_safe_base64(to_hash) + + elif LookupAlgorithm.NONE in supported_lookup_algorithms: + # Perform a non-hashed lookup + lookup_algorithm = LookupAlgorithm.NONE + + # Combine together plaintext address and medium + lookup_value = "%s %s" % (address, medium) + + else: + logger.warning( + "None of the provided lookup algorithms of %s are supported: %s", + id_server, + supported_lookup_algorithms, + ) + raise SynapseError( + 400, + "Provided identity server does not support any v2 lookup " + "algorithms that this homeserver supports.", + ) + + # Authenticate with identity server given the access token from the client + headers = {"Authorization": create_id_access_token_header(id_access_token)} + + try: + lookup_results = yield self.blacklisting_http_client.post_json_get_json( + "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), + { + "addresses": [lookup_value], + "algorithm": lookup_algorithm, + "pepper": lookup_pepper, + }, + headers=headers, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except Exception as e: + logger.warning("Error when performing a v2 3pid lookup: %s", e) + raise SynapseError( + 500, "Unknown error occurred during identity server lookup" + ) + + # Check for a mapping from what we looked up to an MXID + if "mappings" not in lookup_results or not isinstance( + lookup_results["mappings"], dict + ): + logger.warning("No results from 3pid lookup") + return None + + # Return the MXID if it's available, or None otherwise + mxid = lookup_results["mappings"].get(lookup_value) + return mxid + + @defer.inlineCallbacks + def _verify_any_signature(self, data, server_hostname): + if server_hostname not in data["signatures"]: + raise AuthError(401, "No signature from server %s" % (server_hostname,)) + for key_name, signature in data["signatures"][server_hostname].items(): + try: + key_data = yield self.blacklisting_http_client.get_json( + "%s%s/_matrix/identity/api/v1/pubkey/%s" + % (id_server_scheme, server_hostname, key_name) + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + if "public_key" not in key_data: + raise AuthError( + 401, "No public key named %s from %s" % (key_name, server_hostname) + ) + verify_signed_json( + data, + server_hostname, + decode_verify_key_bytes( + key_name, decode_base64(key_data["public_key"]) + ), + ) + return + + @defer.inlineCallbacks + def ask_id_server_for_third_party_invite( + self, + requester, + id_server, + medium, + address, + room_id, + inviter_user_id, + room_alias, + room_avatar_url, + room_join_rules, + room_name, + inviter_display_name, + inviter_avatar_url, + id_access_token=None, + ): + """ + Asks an identity server for a third party invite. + + Args: + requester (Requester) + id_server (str): hostname + optional port for the identity server. + medium (str): The literal string "email". + address (str): The third party address being invited. + room_id (str): The ID of the room to which the user is invited. + inviter_user_id (str): The user ID of the inviter. + room_alias (str): An alias for the room, for cosmetic notifications. + room_avatar_url (str): The URL of the room's avatar, for cosmetic + notifications. + room_join_rules (str): The join rules of the email (e.g. "public"). + room_name (str): The m.room.name of the room. + inviter_display_name (str): The current display name of the + inviter. + inviter_avatar_url (str): The URL of the inviter's avatar. + id_access_token (str|None): The access token to authenticate to the identity + server with + + Returns: + A deferred tuple containing: + token (str): The token which must be signed to prove authenticity. + public_keys ([{"public_key": str, "key_validity_url": str}]): + public_key is a base64-encoded ed25519 public key. + fallback_public_key: One element from public_keys. + display_name (str): A user-friendly name to represent the invited + user. + """ + invite_config = { + "medium": medium, + "address": address, + "room_id": room_id, + "room_alias": room_alias, + "room_avatar_url": room_avatar_url, + "room_join_rules": room_join_rules, + "room_name": room_name, + "sender": inviter_user_id, + "sender_display_name": inviter_display_name, + "sender_avatar_url": inviter_avatar_url, + } + + # Add the identity service access token to the JSON body and use the v2 + # Identity Service endpoints if id_access_token is present + data = None + base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server) + + if id_access_token: + key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % ( + id_server_scheme, + id_server, + ) + + # Attempt a v2 lookup + url = base_url + "/v2/store-invite" + try: + data = yield self.blacklisting_http_client.post_json_get_json( + url, + invite_config, + {"Authorization": create_id_access_token_header(id_access_token)}, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except HttpResponseException as e: + if e.code != 404: + logger.info("Failed to POST %s with JSON: %s", url, e) + raise e + + if data is None: + key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % ( + id_server_scheme, + id_server, + ) + url = base_url + "/api/v1/store-invite" + + try: + data = yield self.blacklisting_http_client.post_json_get_json( + url, invite_config + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except HttpResponseException as e: + logger.warning( + "Error trying to call /store-invite on %s%s: %s", + id_server_scheme, + id_server, + e, + ) + + if data is None: + # Some identity servers may only support application/x-www-form-urlencoded + # types. This is especially true with old instances of Sydent, see + # https://github.com/matrix-org/sydent/pull/170 + try: + data = yield self.blacklisting_http_client.post_urlencoded_get_json( + url, invite_config + ) + except HttpResponseException as e: + logger.warning( + "Error calling /store-invite on %s%s with fallback " + "encoding: %s", + id_server_scheme, + id_server, + e, + ) + raise e + + # TODO: Check for success + token = data["token"] + public_keys = data.get("public_keys", []) + if "public_key" in data: + fallback_public_key = { + "public_key": data["public_key"], + "key_validity_url": key_validity_url, + } + else: + fallback_public_key = public_keys[0] + + if not public_keys: + public_keys.append(fallback_public_key) + display_name = data["display_name"] + return token, public_keys, fallback_public_key, display_name + def create_id_access_token_header(id_access_token): """Create an Authorization header for passing to SimpleHttpClient as the header value diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 94cd0cf3ef..8abdb1b6e6 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -20,29 +20,19 @@ import logging from six.moves import http_client -from signedjson.key import decode_verify_key_bytes -from signedjson.sign import verify_signed_json -from unpaddedbase64 import decode_base64 - from twisted.internet import defer -from twisted.internet.error import TimeoutError from synapse import types from synapse.api.constants import EventTypes, Membership -from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError -from synapse.handlers.identity import LookupAlgorithm, create_id_access_token_header -from synapse.http.client import SimpleHttpClient +from synapse.api.errors import AuthError, Codes, SynapseError from synapse.types import RoomID, UserID from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room, user_left_room -from synapse.util.hash import sha256_and_url_safe_base64 from ._base import BaseHandler logger = logging.getLogger(__name__) -id_server_scheme = "https://" - class RoomMemberHandler(object): # TODO(paul): This handler currently contains a messy conflation of @@ -63,14 +53,10 @@ class RoomMemberHandler(object): self.auth = hs.get_auth() self.state_handler = hs.get_state_handler() self.config = hs.config - # We create a blacklisting instance of SimpleHttpClient for contacting identity - # servers specified by clients - self.simple_http_client = SimpleHttpClient( - hs, ip_blacklist=hs.config.federation_ip_range_blacklist - ) self.federation_handler = hs.get_handlers().federation_handler self.directory_handler = hs.get_handlers().directory_handler + self.identity_handler = hs.get_handlers().identity_handler self.registration_handler = hs.get_registration_handler() self.profile_handler = hs.get_profile_handler() self.event_creation_handler = hs.get_event_creation_handler() @@ -682,7 +668,9 @@ class RoomMemberHandler(object): 403, "Looking up third-party identifiers is denied from this server" ) - invitee = yield self._lookup_3pid(id_server, medium, address, id_access_token) + invitee = yield self.identity_handler.lookup_3pid( + id_server, medium, address, id_access_token + ) if invitee: yield self.update_membership( @@ -700,211 +688,6 @@ class RoomMemberHandler(object): id_access_token=id_access_token, ) - @defer.inlineCallbacks - def _lookup_3pid(self, id_server, medium, address, id_access_token=None): - """Looks up a 3pid in the passed identity server. - - Args: - id_server (str): The server name (including port, if required) - of the identity server to use. - medium (str): The type of the third party identifier (e.g. "email"). - address (str): The third party identifier (e.g. "foo@example.com"). - id_access_token (str|None): The access token to authenticate to the identity - server with - - Returns: - str|None: the matrix ID of the 3pid, or None if it is not recognized. - """ - if id_access_token is not None: - try: - results = yield self._lookup_3pid_v2( - id_server, id_access_token, medium, address - ) - return results - - except Exception as e: - # Catch HttpResponseExcept for a non-200 response code - # Check if this identity server does not know about v2 lookups - if isinstance(e, HttpResponseException) and e.code == 404: - # This is an old identity server that does not yet support v2 lookups - logger.warning( - "Attempted v2 lookup on v1 identity server %s. Falling " - "back to v1", - id_server, - ) - else: - logger.warning("Error when looking up hashing details: %s", e) - return None - - return (yield self._lookup_3pid_v1(id_server, medium, address)) - - @defer.inlineCallbacks - def _lookup_3pid_v1(self, id_server, medium, address): - """Looks up a 3pid in the passed identity server using v1 lookup. - - Args: - id_server (str): The server name (including port, if required) - of the identity server to use. - medium (str): The type of the third party identifier (e.g. "email"). - address (str): The third party identifier (e.g. "foo@example.com"). - - Returns: - str: the matrix ID of the 3pid, or None if it is not recognized. - """ - try: - data = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), - {"medium": medium, "address": address}, - ) - - if "mxid" in data: - if "signatures" not in data: - raise AuthError(401, "No signatures on 3pid binding") - yield self._verify_any_signature(data, id_server) - return data["mxid"] - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - except IOError as e: - logger.warning("Error from v1 identity server lookup: %s" % (e,)) - - return None - - @defer.inlineCallbacks - def _lookup_3pid_v2(self, id_server, id_access_token, medium, address): - """Looks up a 3pid in the passed identity server using v2 lookup. - - Args: - id_server (str): The server name (including port, if required) - of the identity server to use. - id_access_token (str): The access token to authenticate to the identity server with - medium (str): The type of the third party identifier (e.g. "email"). - address (str): The third party identifier (e.g. "foo@example.com"). - - Returns: - Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. - """ - # Check what hashing details are supported by this identity server - try: - hash_details = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), - {"access_token": id_access_token}, - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - - if not isinstance(hash_details, dict): - logger.warning( - "Got non-dict object when checking hash details of %s%s: %s", - id_server_scheme, - id_server, - hash_details, - ) - raise SynapseError( - 400, - "Non-dict object from %s%s during v2 hash_details request: %s" - % (id_server_scheme, id_server, hash_details), - ) - - # Extract information from hash_details - supported_lookup_algorithms = hash_details.get("algorithms") - lookup_pepper = hash_details.get("lookup_pepper") - if ( - not supported_lookup_algorithms - or not isinstance(supported_lookup_algorithms, list) - or not lookup_pepper - or not isinstance(lookup_pepper, str) - ): - raise SynapseError( - 400, - "Invalid hash details received from identity server %s%s: %s" - % (id_server_scheme, id_server, hash_details), - ) - - # Check if any of the supported lookup algorithms are present - if LookupAlgorithm.SHA256 in supported_lookup_algorithms: - # Perform a hashed lookup - lookup_algorithm = LookupAlgorithm.SHA256 - - # Hash address, medium and the pepper with sha256 - to_hash = "%s %s %s" % (address, medium, lookup_pepper) - lookup_value = sha256_and_url_safe_base64(to_hash) - - elif LookupAlgorithm.NONE in supported_lookup_algorithms: - # Perform a non-hashed lookup - lookup_algorithm = LookupAlgorithm.NONE - - # Combine together plaintext address and medium - lookup_value = "%s %s" % (address, medium) - - else: - logger.warning( - "None of the provided lookup algorithms of %s are supported: %s", - id_server, - supported_lookup_algorithms, - ) - raise SynapseError( - 400, - "Provided identity server does not support any v2 lookup " - "algorithms that this homeserver supports.", - ) - - # Authenticate with identity server given the access token from the client - headers = {"Authorization": create_id_access_token_header(id_access_token)} - - try: - lookup_results = yield self.simple_http_client.post_json_get_json( - "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), - { - "addresses": [lookup_value], - "algorithm": lookup_algorithm, - "pepper": lookup_pepper, - }, - headers=headers, - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - except Exception as e: - logger.warning("Error when performing a v2 3pid lookup: %s", e) - raise SynapseError( - 500, "Unknown error occurred during identity server lookup" - ) - - # Check for a mapping from what we looked up to an MXID - if "mappings" not in lookup_results or not isinstance( - lookup_results["mappings"], dict - ): - logger.warning("No results from 3pid lookup") - return None - - # Return the MXID if it's available, or None otherwise - mxid = lookup_results["mappings"].get(lookup_value) - return mxid - - @defer.inlineCallbacks - def _verify_any_signature(self, data, server_hostname): - if server_hostname not in data["signatures"]: - raise AuthError(401, "No signature from server %s" % (server_hostname,)) - for key_name, signature in data["signatures"][server_hostname].items(): - try: - key_data = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/api/v1/pubkey/%s" - % (id_server_scheme, server_hostname, key_name) - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - if "public_key" not in key_data: - raise AuthError( - 401, "No public key named %s from %s" % (key_name, server_hostname) - ) - verify_signed_json( - data, - server_hostname, - decode_verify_key_bytes( - key_name, decode_base64(key_data["public_key"]) - ), - ) - return - @defer.inlineCallbacks def _make_and_store_3pid_invite( self, @@ -951,7 +734,7 @@ class RoomMemberHandler(object): room_avatar_url = room_avatar_event.content.get("url", "") token, public_keys, fallback_public_key, display_name = ( - yield self._ask_id_server_for_third_party_invite( + yield self.identity_handler.ask_id_server_for_third_party_invite( requester=requester, id_server=id_server, medium=medium, @@ -987,147 +770,6 @@ class RoomMemberHandler(object): txn_id=txn_id, ) - @defer.inlineCallbacks - def _ask_id_server_for_third_party_invite( - self, - requester, - id_server, - medium, - address, - room_id, - inviter_user_id, - room_alias, - room_avatar_url, - room_join_rules, - room_name, - inviter_display_name, - inviter_avatar_url, - id_access_token=None, - ): - """ - Asks an identity server for a third party invite. - - Args: - requester (Requester) - id_server (str): hostname + optional port for the identity server. - medium (str): The literal string "email". - address (str): The third party address being invited. - room_id (str): The ID of the room to which the user is invited. - inviter_user_id (str): The user ID of the inviter. - room_alias (str): An alias for the room, for cosmetic notifications. - room_avatar_url (str): The URL of the room's avatar, for cosmetic - notifications. - room_join_rules (str): The join rules of the email (e.g. "public"). - room_name (str): The m.room.name of the room. - inviter_display_name (str): The current display name of the - inviter. - inviter_avatar_url (str): The URL of the inviter's avatar. - id_access_token (str|None): The access token to authenticate to the identity - server with - - Returns: - A deferred tuple containing: - token (str): The token which must be signed to prove authenticity. - public_keys ([{"public_key": str, "key_validity_url": str}]): - public_key is a base64-encoded ed25519 public key. - fallback_public_key: One element from public_keys. - display_name (str): A user-friendly name to represent the invited - user. - """ - invite_config = { - "medium": medium, - "address": address, - "room_id": room_id, - "room_alias": room_alias, - "room_avatar_url": room_avatar_url, - "room_join_rules": room_join_rules, - "room_name": room_name, - "sender": inviter_user_id, - "sender_display_name": inviter_display_name, - "sender_avatar_url": inviter_avatar_url, - } - - # Add the identity service access token to the JSON body and use the v2 - # Identity Service endpoints if id_access_token is present - data = None - base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server) - - if id_access_token: - key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % ( - id_server_scheme, - id_server, - ) - - # Attempt a v2 lookup - url = base_url + "/v2/store-invite" - try: - data = yield self.simple_http_client.post_json_get_json( - url, - invite_config, - {"Authorization": create_id_access_token_header(id_access_token)}, - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - except HttpResponseException as e: - if e.code != 404: - logger.info("Failed to POST %s with JSON: %s", url, e) - raise e - - if data is None: - key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % ( - id_server_scheme, - id_server, - ) - url = base_url + "/api/v1/store-invite" - - try: - data = yield self.simple_http_client.post_json_get_json( - url, invite_config - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - except HttpResponseException as e: - logger.warning( - "Error trying to call /store-invite on %s%s: %s", - id_server_scheme, - id_server, - e, - ) - - if data is None: - # Some identity servers may only support application/x-www-form-urlencoded - # types. This is especially true with old instances of Sydent, see - # https://github.com/matrix-org/sydent/pull/170 - try: - data = yield self.simple_http_client.post_urlencoded_get_json( - url, invite_config - ) - except HttpResponseException as e: - logger.warning( - "Error calling /store-invite on %s%s with fallback " - "encoding: %s", - id_server_scheme, - id_server, - e, - ) - raise e - - # TODO: Check for success - token = data["token"] - public_keys = data.get("public_keys", []) - if "public_key" in data: - fallback_public_key = { - "public_key": data["public_key"], - "key_validity_url": key_validity_url, - } - else: - fallback_public_key = public_keys[0] - - if not public_keys: - public_keys.append(fallback_public_key) - display_name = data["display_name"] - return token, public_keys, fallback_public_key, display_name - @defer.inlineCallbacks def _is_host_in_room(self, current_state_ids): # Have we just created the room, and is this about to be the very -- cgit 1.4.1 From 5257a2fb1c983158bbdee8be4e61066f1a83d4a8 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 14:49:53 +0100 Subject: Reject pending invites on deactivation --- synapse/handlers/deactivate_account.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'synapse') diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index d83912c9a4..9815365f54 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -120,6 +120,10 @@ class DeactivateAccountHandler(BaseHandler): # parts users from rooms (if it isn't already running) self._start_user_parting() + # Reject all pending invites for the user, so that it doesn't show up in the + # invitees list of rooms. + self._reject_pending_invites_for_user(user_id) + # Remove all information on the user from the account_validity table. if self._account_validity_enabled: yield self.store.delete_account_validity_for_user(user_id) @@ -129,6 +133,33 @@ class DeactivateAccountHandler(BaseHandler): return identity_server_supports_unbinding + def _reject_pending_invites_for_user(self, user_id): + """Reject pending invites addressed to a given user ID. + + Args: + user_id (str): The user ID to reject pending invites for. + """ + user = UserID.from_string(user_id) + pending_invites = yield self.store.get_invited_rooms_for_user(user_id) + + for room in pending_invites: + try: + yield self._room_member_handler.update_membership( + create_requester(user), + user, + room.room_id, + "leave", + ratelimit=False, + require_consent=False, + ) + except Exception: + logger.exception( + "Failed to reject invite for user %r in room %r:" + " ignoring and continuing", + user_id, + room.room_id, + ) + def _start_user_parting(self): """ Start the process that goes through the table of users -- cgit 1.4.1 From 132279a46fd76de8f767bc6977192900c450fec9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 27 Sep 2019 15:11:14 +0100 Subject: Patch inlinecallbacks for log contexts --- synapse/__init__.py | 6 +++ synapse/handlers/room_member.py | 4 +- synapse/storage/_base.py | 7 ++-- synapse/storage/push_rule.py | 2 +- tests/patch_inline_callbacks.py | 86 +++++++++++++++++++++++++++++++++++++++-- 5 files changed, 95 insertions(+), 10 deletions(-) (limited to 'synapse') diff --git a/synapse/__init__.py b/synapse/__init__.py index ddfe9ec542..4401fd52f0 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -17,8 +17,11 @@ """ This is a reference implementation of a Matrix home server. """ +import os import sys +from tests.patch_inline_callbacks import do_patch + # Check that we're not running on an unsupported Python version. if sys.version_info < (3, 5): print("Synapse requires Python 3.5 or above.") @@ -36,3 +39,6 @@ except ImportError: pass __version__ = "1.4.0rc1" + +if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): + do_patch() diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 8abdb1b6e6..19e44b5460 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -213,11 +213,11 @@ class RoomMemberHandler(object): if predecessor: # It is an upgraded room. Copy over old tags - self.copy_room_tags_and_direct_to_room( + yield self.copy_room_tags_and_direct_to_room( predecessor["room_id"], room_id, user_id ) # Move over old push rules - self.store.move_push_rules_from_room_to_room_for_user( + yield self.store.move_push_rules_from_room_to_room_for_user( predecessor["room_id"], room_id, user_id ) elif event.membership == Membership.LEAVE: diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index abe16334ec..06cc14fcd1 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -30,7 +30,7 @@ from prometheus_client import Histogram from twisted.internet import defer from synapse.api.errors import StoreError -from synapse.logging.context import LoggingContext, PreserveLoggingContext +from synapse.logging.context import LoggingContext, make_deferred_yieldable from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import get_domain_from_id @@ -550,8 +550,9 @@ class SQLBaseStore(object): return func(conn, *args, **kwargs) - with PreserveLoggingContext(): - result = yield self._db_pool.runWithConnection(inner_func, *args, **kwargs) + result = yield make_deferred_yieldable( + self._db_pool.runWithConnection(inner_func, *args, **kwargs) + ) return result diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index a6517c4cf3..1f878e6575 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -235,7 +235,7 @@ class PushRulesWorkerStore( (c.get("key") == "room_id" and c.get("pattern") == old_room_id) for c in conditions ): - self.move_push_rule_from_room_to_room(new_room_id, user_id, rule) + yield self.move_push_rule_from_room_to_room(new_room_id, user_id, rule) @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): diff --git a/tests/patch_inline_callbacks.py b/tests/patch_inline_callbacks.py index 220884311c..5ef0aff0c3 100644 --- a/tests/patch_inline_callbacks.py +++ b/tests/patch_inline_callbacks.py @@ -15,6 +15,7 @@ from __future__ import print_function +import inspect import functools import sys @@ -33,17 +34,19 @@ def do_patch(): orig_inline_callbacks = defer.inlineCallbacks def new_inline_callbacks(f): - - orig = orig_inline_callbacks(f) - @functools.wraps(f) def wrapped(*args, **kwargs): start_context = LoggingContext.current_context() + changes = [] + orig = orig_inline_callbacks(_check_yield_points(f, changes, start_context)) try: res = orig(*args, **kwargs) except Exception: if LoggingContext.current_context() != start_context: + for err in changes: + print(err, file=sys.stderr) + err = "%s changed context from %s to %s on exception" % ( f, start_context, @@ -55,7 +58,10 @@ def do_patch(): if not isinstance(res, Deferred) or res.called: if LoggingContext.current_context() != start_context: - err = "%s changed context from %s to %s" % ( + for err in changes: + print(err, file=sys.stderr) + + err = "Completed %s changed context from %s to %s" % ( f, start_context, LoggingContext.current_context(), @@ -76,6 +82,8 @@ def do_patch(): def check_ctx(r): if LoggingContext.current_context() != start_context: + for err in changes: + print(err, file=sys.stderr) err = "%s completion of %s changed context from %s to %s" % ( "Failure" if isinstance(r, Failure) else "Success", f, @@ -92,3 +100,73 @@ def do_patch(): return wrapped defer.inlineCallbacks = new_inline_callbacks + + +def _check_yield_points(f, changes, start_context): + from synapse.logging.context import LoggingContext + + @functools.wraps(f) + def check_yield_points_inner(*args, **kwargs): + gen = f(*args, **kwargs) + + last_yield_line_no = 1 + result = None + while True: + try: + isFailure = isinstance(result, Failure) + if isFailure: + d = result.throwExceptionIntoGenerator(gen) + else: + d = gen.send(result) + except (StopIteration, defer._DefGen_Return) as e: + if LoggingContext.current_context() != start_context: + # This happens when the context is lost sometime *after* the + # final yield and returning. E.g. we forgot to yield on a + # function that returns a deferred. + err = ( + "%s returned and changed context from %s to %s, in %s between %d and end of func" + % ( + f.__qualname__, + start_context, + LoggingContext.current_context(), + f.__code__.co_filename, + last_yield_line_no, + ) + ) + changes.append(err) + # print(err, file=sys.stderr) + # raise Exception(err) + return getattr(e, "value", None) + + try: + result = yield d + except Exception as e: + result = Failure(e) + + frame = gen.gi_frame + if frame.f_code.co_name == "check_yield_points_inner": + frame = inspect.getgeneratorlocals(gen)["gen"].gi_frame + + if LoggingContext.current_context() != start_context: + # This happens because the context is lost sometime *after* the + # previous yield and *after* the current yield. E.g. the + # deferred we waited on didn't follow the rules, or we forgot to + # yield on a function between the two yield points. + err = ( + "%s changed context from %s to %s, happened between lines %d and %d in %s" + % ( + frame.f_code.co_name, + start_context, + LoggingContext.current_context(), + last_yield_line_no, + frame.f_lineno, + frame.f_code.co_filename, + ) + ) + changes.append(err) + # print(err, file=sys.stderr) + # raise Exception(err) + + last_yield_line_no = frame.f_lineno + + return check_yield_points_inner -- cgit 1.4.1 From 72a2708ac6335985eb5171f5685f73d2ea120a2e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 15:13:39 +0100 Subject: Fixup and add some logging --- synapse/handlers/deactivate_account.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 9815365f54..763fea3a24 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -122,7 +122,7 @@ class DeactivateAccountHandler(BaseHandler): # Reject all pending invites for the user, so that it doesn't show up in the # invitees list of rooms. - self._reject_pending_invites_for_user(user_id) + yield self._reject_pending_invites_for_user(user_id) # Remove all information on the user from the account_validity table. if self._account_validity_enabled: @@ -133,6 +133,7 @@ class DeactivateAccountHandler(BaseHandler): return identity_server_supports_unbinding + @defer.inlineCallbacks def _reject_pending_invites_for_user(self, user_id): """Reject pending invites addressed to a given user ID. @@ -142,6 +143,8 @@ class DeactivateAccountHandler(BaseHandler): user = UserID.from_string(user_id) pending_invites = yield self.store.get_invited_rooms_for_user(user_id) + logger.info(pending_invites) + for room in pending_invites: try: yield self._room_member_handler.update_membership( @@ -152,6 +155,11 @@ class DeactivateAccountHandler(BaseHandler): ratelimit=False, require_consent=False, ) + logger.info( + "Rejected invite for user %r in room %r", + user_id, + room.room_id, + ) except Exception: logger.exception( "Failed to reject invite for user %r in room %r:" -- cgit 1.4.1 From 873fe7883cf0d7cf5346a9a55d40967a35848e33 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 15:21:03 +0100 Subject: Lint --- synapse/handlers/deactivate_account.py | 4 +--- tests/rest/client/v2_alpha/test_account.py | 8 +------- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 763fea3a24..148d1424ca 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -156,9 +156,7 @@ class DeactivateAccountHandler(BaseHandler): require_consent=False, ) logger.info( - "Rejected invite for user %r in room %r", - user_id, - room.room_id, + "Rejected invite for user %r in room %r", user_id, room.room_id ) except Exception: logger.exception( diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py index 69c33dfd8a..434b730faf 100644 --- a/tests/rest/client/v2_alpha/test_account.py +++ b/tests/rest/client/v2_alpha/test_account.py @@ -280,12 +280,7 @@ class DeactivateTestCase(unittest.HomeserverTestCase): # Make @inviter:test invite @invitee:test in a new room. room_id = self.helper.create_room_as(inviter_id, tok=inviter_tok) - self.helper.invite( - room=room_id, - src=inviter_id, - targ=invitee_id, - tok=inviter_tok, - ) + self.helper.invite(room=room_id, src=inviter_id, targ=invitee_id, tok=inviter_tok) # Make sure the invite is here. pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id)) @@ -322,4 +317,3 @@ class DeactivateTestCase(unittest.HomeserverTestCase): ) self.render(request) self.assertEqual(request.code, 200) - -- cgit 1.4.1 From bbe2a0f33916d7b01179c56b230307c46843625a Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 16:10:36 +0100 Subject: Update synapse/handlers/deactivate_account.py Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- synapse/handlers/deactivate_account.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 148d1424ca..5cf01479db 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -120,7 +120,7 @@ class DeactivateAccountHandler(BaseHandler): # parts users from rooms (if it isn't already running) self._start_user_parting() - # Reject all pending invites for the user, so that it doesn't show up in the + # Reject all pending invites for the user, so that they do not show up in the # invitees list of rooms. yield self._reject_pending_invites_for_user(user_id) -- cgit 1.4.1 From af92110c465ea7cf4d04e1193b58f16ae26a75d6 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 16:12:15 +0100 Subject: Update synapse/handlers/deactivate_account.py Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- synapse/handlers/deactivate_account.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 5cf01479db..5f142f82c2 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -156,7 +156,9 @@ class DeactivateAccountHandler(BaseHandler): require_consent=False, ) logger.info( - "Rejected invite for user %r in room %r", user_id, room.room_id + "Rejected invite for deactivated user %r in room %r", + user_id, + room.room_id, ) except Exception: logger.exception( -- cgit 1.4.1 From 3e42d47a5a06ea5d353b75a42040107bf401d8ba Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 16:15:01 +0100 Subject: Incorporate review --- synapse/handlers/deactivate_account.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 5f142f82c2..63267a0a4c 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -120,8 +120,8 @@ class DeactivateAccountHandler(BaseHandler): # parts users from rooms (if it isn't already running) self._start_user_parting() - # Reject all pending invites for the user, so that they do not show up in the - # invitees list of rooms. + # Reject all pending invites for the user, so that the user doesn't show up in the + # "invited" section of rooms' members list. yield self._reject_pending_invites_for_user(user_id) # Remove all information on the user from the account_validity table. @@ -143,8 +143,6 @@ class DeactivateAccountHandler(BaseHandler): user = UserID.from_string(user_id) pending_invites = yield self.store.get_invited_rooms_for_user(user_id) - logger.info(pending_invites) - for room in pending_invites: try: yield self._room_member_handler.update_membership( -- cgit 1.4.1 From f3451118a6dca1499daadf224c3eab801dad0c0c Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 27 Sep 2019 17:59:18 +0100 Subject: Edit SimpleHttpClient to reference that header keys can be passed as str or bytes (#6077) --- changelog.d/6077.misc | 1 + synapse/http/client.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 changelog.d/6077.misc (limited to 'synapse') diff --git a/changelog.d/6077.misc b/changelog.d/6077.misc new file mode 100644 index 0000000000..31ac5b97a4 --- /dev/null +++ b/changelog.d/6077.misc @@ -0,0 +1 @@ +Edit header dicts docstrings in SimpleHttpClient to note that `str` or `bytes` can be passed as header keys. diff --git a/synapse/http/client.py b/synapse/http/client.py index 51765ae3c0..cdf828a4ff 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -327,7 +327,7 @@ class SimpleHttpClient(object): Args: uri (str): args (dict[str, str|List[str]]): query params - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: @@ -371,7 +371,7 @@ class SimpleHttpClient(object): Args: uri (str): post_json (object): - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: @@ -414,7 +414,7 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the @@ -438,7 +438,7 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the @@ -482,7 +482,7 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the @@ -516,7 +516,7 @@ class SimpleHttpClient(object): Args: url (str): The URL to GET output_stream (file): File to write the response body to. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: A (int,dict,string,int) tuple of the file length, dict of the response -- cgit 1.4.1 From 16cb9a71b8b46604d49944f0b9c316687becca93 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 30 Sep 2019 09:38:41 +0100 Subject: Drop unused tables (#6115) These tables are unused since #5893 (as amended by #6047), so we can now drop them. Fixes #6048. --- changelog.d/6115.misc | 1 + .../schema/delta/56/drop_unused_event_tables.sql | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 changelog.d/6115.misc create mode 100644 synapse/storage/schema/delta/56/drop_unused_event_tables.sql (limited to 'synapse') diff --git a/changelog.d/6115.misc b/changelog.d/6115.misc new file mode 100644 index 0000000000..b19e395a99 --- /dev/null +++ b/changelog.d/6115.misc @@ -0,0 +1 @@ +Drop some unused database tables. diff --git a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql new file mode 100644 index 0000000000..9f09922c67 --- /dev/null +++ b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- these tables are never used. +DROP TABLE IF EXISTS room_names; +DROP TABLE IF EXISTS topics; +DROP TABLE IF EXISTS history_visibility; +DROP TABLE IF EXISTS guest_access; -- cgit 1.4.1 From 9267741a5f7732d7d16f8445edc68bc68b730601 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 30 Sep 2019 11:58:36 +0100 Subject: Fix `devices_last_seen` background update. Fixes #6134. --- synapse/storage/client_ips.py | 46 +++++++++++++++++++++++++++++++------ synapse/storage/engines/postgres.py | 7 ++++++ synapse/storage/engines/sqlite.py | 8 +++++++ 3 files changed, 54 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 539584288d..bb135166ce 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -463,14 +463,46 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): last_device_id = progress.get("last_device_id", "") def _devices_last_seen_update_txn(txn): + # This consists of two queries: + # + # 1. The sub-query searches for the next N devices and joins + # against user_ips to find the max last_seen associated with + # that device. + # 2. The outer query then joins again against user_ips on + # user/device/last_seen. This *should* hopefully only + # return one row, but if it does return more than one then + # we'll just end up updating the same device row multiple + # times, which is fine. + + if self.database_engine.supports_tuple_comparison: + where_clause = "(user_id, device_id) > (?, ?)" + where_args = [last_user_id, last_device_id] + else: + # We explicitly do a `user_id >= ? AND (...)` here to ensure + # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)` + # makes it hard for query optimiser to tell that it can use the + # index on user_id + where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)" + where_args = [last_user_id, last_user_id, last_device_id] + sql = """ - SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices - INNER JOIN user_ips AS u USING (user_id, device_id) - WHERE user_id > ? OR (user_id = ? AND device_id > ?) - ORDER BY user_id ASC, device_id ASC - LIMIT ? - """ - txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size)) + SELECT + last_seen, ip, user_agent, user_id, device_id + FROM ( + SELECT + user_id, device_id, MAX(u.last_seen) AS last_seen + FROM devices + INNER JOIN user_ips AS u USING (user_id, device_id) + WHERE %(where_clause)s + GROUP BY user_id, device_id + ORDER BY user_id ASC, device_id ASC + LIMIT ? + ) c + INNER JOIN user_ips AS u USING (user_id, device_id, last_seen) + """ % { + "where_clause": where_clause + } + txn.execute(sql, where_args + [batch_size]) rows = txn.fetchall() if not rows: diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 289b6bc281..601617b21e 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -72,6 +72,13 @@ class PostgresEngine(object): """ return True + @property + def supports_tuple_comparison(self): + """ + Do we support comparing tuples, i.e. `(a, b) > (c, d)`? + """ + return True + def is_deadlock(self, error): if isinstance(error, self.module.DatabaseError): # https://www.postgresql.org/docs/current/static/errcodes-appendix.html diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index e9b9caa49a..ac92109366 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -38,6 +38,14 @@ class Sqlite3Engine(object): """ return self.module.sqlite_version_info >= (3, 24, 0) + @property + def supports_tuple_comparison(self): + """ + Do we support comparing tuples, i.e. `(a, b) > (c, d)`? This requires + SQLite 3.15+. + """ + return self.module.sqlite_version_info >= (3, 15, 0) + def check_database(self, txn): pass -- cgit 1.4.1 From a27fb7d5cac3cacc55a1c778f02d074d4115eea6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 1 Oct 2019 11:05:48 +0100 Subject: Don't repeatedly attempt to censor events we don't have. Currently we don't set `have_censored` column if we don't have the target event of a redaction, which means we repeatedly attempt to censor the same non-existant event. When we persist non-redacted events we unset the `have_censored` column for any redactions that target said event. --- synapse/storage/events.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ddf7ab6479..5d56ceeab3 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1389,6 +1389,22 @@ class EventsStore( ], ) + for event, _ in events_and_contexts: + if not event.internal_metadata.is_redacted(): + # If we're persisting an unredacted event we go and ensure + # that we mark any redactions that reference this event as + # requiring censoring. + self._simple_update_txn( + txn, + table="redactions", + keyvalues={ + "redacts": event.event_id, + }, + updatevalues={ + "have_censored": False, + } + ) + def _store_rejected_events_txn(self, txn, events_and_contexts): """Add rows to the 'rejections' table for received events which were rejected @@ -1589,7 +1605,7 @@ class EventsStore( sql = """ SELECT redact_event.event_id, redacts FROM redactions INNER JOIN events AS redact_event USING (event_id) - INNER JOIN events AS original_event ON ( + LEFT JOIN events AS original_event ON ( redact_event.room_id = original_event.room_id AND redacts = original_event.event_id ) -- cgit 1.4.1 From 898dde981b41a4dfb79b5830f17e6eb9871ef762 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 1 Oct 2019 13:23:34 +0100 Subject: Add received_ts column to redactions. This will allow us to efficiently search for uncensored redactions in the DB before a given time. --- synapse/storage/events.py | 20 +++---- synapse/storage/events_bg_updates.py | 61 ++++++++++++++++++++++ .../storage/schema/delta/56/redaction_censor2.sql | 20 +++++++ 3 files changed, 92 insertions(+), 9 deletions(-) create mode 100644 synapse/storage/schema/delta/56/redaction_censor2.sql (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 5d56ceeab3..3104815f1a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1397,12 +1397,8 @@ class EventsStore( self._simple_update_txn( txn, table="redactions", - keyvalues={ - "redacts": event.event_id, - }, - updatevalues={ - "have_censored": False, - } + keyvalues={"redacts": event.event_id}, + updatevalues={"have_censored": False}, ) def _store_rejected_events_txn(self, txn, events_and_contexts): @@ -1568,9 +1564,15 @@ class EventsStore( def _store_redaction(self, txn, event): # invalidate the cache for the redacted event txn.call_after(self._invalidate_get_event_cache, event.redacts) - txn.execute( - "INSERT INTO redactions (event_id, redacts) VALUES (?,?)", - (event.event_id, event.redacts), + + self._simple_insert_txn( + txn, + table="redactions", + values={ + "event_id": event.event_id, + "redacts": event.redacts, + "received_ts": self._clock.time_msec(), + }, ) @defer.inlineCallbacks diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 6587f31e2b..5717baf48c 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -67,6 +67,10 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): self.DELETE_SOFT_FAILED_EXTREMITIES, self._cleanup_extremities_bg_update ) + self.register_background_update_handler( + "redactions_received_ts", self._redactions_received_ts + ) + @defer.inlineCallbacks def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] @@ -397,3 +401,60 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) return num_handled + + @defer.inlineCallbacks + def _redactions_received_ts(self, progress, batch_size): + """Handles filling out the `received_ts` column in redactions. + """ + last_event_id = progress.get("last_event_id", "") + + def _redactions_received_ts_txn(txn): + # Fetch the set of event IDs that we want to update + sql = """ + SELECT event_id FROM redactions + WHERE event_id > ? + ORDER BY event_id ASC + LIMIT ? + """ + + txn.execute(sql, (last_event_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + upper_event_id, = rows[-1] + + # Update the redactions with the received_ts. + # + # Note: Not all events have an associated received_ts, so we + # fallback to using origin_server_ts. If we for some reason don't + # have an origin_server_ts, lets just use the current timestamp. + # + # We don't want to leave it null, as then we'll never try and + # censor those redactions. + sql = """ + UPDATE redactions + SET received_ts = ( + SELECT COALESCE(received_ts, origin_server_ts, ?) FROM events + WHERE events.event_id = redactions.event_id + ) + WHERE ? <= event_id AND event_id <= ? + """ + + txn.execute(sql, (self._clock.time_msec(), last_event_id, upper_event_id)) + + self._background_update_progress_txn( + txn, "redactions_received_ts", {"last_event_id": upper_event_id} + ) + + return len(rows) + + count = yield self.runInteraction( + "_redactions_received_ts", _redactions_received_ts_txn + ) + + if not count: + yield self._end_background_update("redactions_received_ts") + + return count diff --git a/synapse/storage/schema/delta/56/redaction_censor2.sql b/synapse/storage/schema/delta/56/redaction_censor2.sql new file mode 100644 index 0000000000..77a5eca499 --- /dev/null +++ b/synapse/storage/schema/delta/56/redaction_censor2.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE redactions ADD COLUMN received_ts BIGINT; +CREATE INDEX redactions_have_censored_ts ON redactions(received_ts) WHERE not have_censored; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('redactions_received_ts', '{}'); -- cgit 1.4.1 From 5e8387af9e771ae42c7c8c4dc186000d862d3787 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 1 Oct 2019 13:28:41 +0100 Subject: Use `received_ts` to find uncensored redacted events Joining against `events` and ordering by `stream_ordering` is inefficient as it forced scanning the entirety of the redactions table. This isn't the case if we use `redactions.received_ts` column as we can then use an index. --- synapse/storage/events.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3104815f1a..2e485c8644 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1589,36 +1589,29 @@ class EventsStore( if self.hs.config.redaction_retention_period is None: return - max_pos = yield self.find_first_stream_ordering_after_ts( - self._clock.time_msec() - self.hs.config.redaction_retention_period - ) + before_ts = self._clock.time_msec() - self.hs.config.redaction_retention_period # We fetch all redactions that: # 1. point to an event we have, - # 2. has a stream ordering from before the cut off, and + # 2. has a received_ts from before the cut off, and # 3. we haven't yet censored. # # This is limited to 100 events to ensure that we don't try and do too # much at once. We'll get called again so this should eventually catch # up. - # - # We use the range [-max_pos, max_pos] to handle backfilled events, - # which are given negative stream ordering. sql = """ - SELECT redact_event.event_id, redacts FROM redactions - INNER JOIN events AS redact_event USING (event_id) + SELECT redactions.event_id, redacts FROM redactions LEFT JOIN events AS original_event ON ( - redact_event.room_id = original_event.room_id - AND redacts = original_event.event_id + redacts = original_event.event_id ) WHERE NOT have_censored - AND ? <= redact_event.stream_ordering AND redact_event.stream_ordering <= ? - ORDER BY redact_event.stream_ordering ASC + AND redactions.received_ts <= ? + ORDER BY redactions.received_ts ASC LIMIT ? """ rows = yield self._execute( - "_censor_redactions_fetch", None, sql, -max_pos, max_pos, 100 + "_censor_redactions_fetch", None, sql, before_ts, 100 ) updates = [] -- cgit 1.4.1 From ce7a3e7e27ba4215ed86112d2967b0acca4cfb77 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 10:14:01 +0100 Subject: Fix fetching censored redactions from DB Fetching a censored redactions caused an exception due to the code expecting redactions to have a `redact` key, which redacted redactions don't have. --- synapse/storage/events_worker.py | 14 ++++++++++++++ tests/storage/test_redaction.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'synapse') diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index c6fa7f82fd..57ce0304e9 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -238,6 +238,20 @@ class EventsWorkerStore(SQLBaseStore): # we have to recheck auth now. if not allow_rejected and entry.event.type == EventTypes.Redaction: + if not hasattr(entry.event, "redacts"): + # A redacted redaction doesn't have a `redacts` key, in + # which case lets just withhold the event. + # + # Note: Most of the time if the redactions has been + # redacted we still have the un-redacted event in the DB + # and so we'll still see the `redacts` key. However, this + # isn't always true e.g. if we have censored the event. + logger.debug( + "Withholding redaction event %s as we don't have redacts key", + event_id, + ) + continue + redacted_event_id = entry.event.redacts event_map = yield self._get_events_from_cache_or_db([redacted_event_id]) original_event_entry = event_map.get(redacted_event_id) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index deecfad9fb..427d3c49c5 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -118,6 +118,8 @@ class RedactionTestCase(unittest.HomeserverTestCase): self.get_success(self.store.persist_event(event, context)) + return event + def test_redact(self): self.get_success( self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) @@ -361,3 +363,37 @@ class RedactionTestCase(unittest.HomeserverTestCase): ) self.assert_dict({"content": {}}, json.loads(event_json)) + + def test_redact_redaction(self): + """Tests that we can redact a redaction and can fetch it again. + """ + + self.get_success( + self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) + ) + + msg_event = self.get_success(self.inject_message(self.room1, self.u_alice, "t")) + + first_redact_event = self.get_success( + self.inject_redaction( + self.room1, msg_event.event_id, self.u_alice, "Redacting message" + ) + ) + + self.get_success( + self.inject_redaction( + self.room1, + first_redact_event.event_id, + self.u_alice, + "Redacting redaction", + ) + ) + + # Now lets jump to the future where we have censored the redaction event + # in the DB. + self.reactor.advance(60 * 60 * 24 * 31) + + # We just want to check that fetching the event doesn't raise an exception. + self.get_success( + self.store.get_event(first_redact_event.event_id, allow_none=True) + ) -- cgit 1.4.1 From f44f1d2e8374b7250a8a68cf3a49e6d1ac63b0fb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 10:36:27 +0100 Subject: Fix errors storing large retry intervals. We have set the max retry interval to a value larger than a postgres or sqlite int can hold, which caused exceptions when updating the destinations table. To fix postgres we need to change the column to a bigint, and for sqlite we lower the max interval to 2**62 (which is still incredibly long). --- .../56/destinations_retry_interval_type.sql.postgres | 18 ++++++++++++++++++ synapse/util/retryutils.py | 2 +- tests/storage/test_transactions.py | 11 +++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres (limited to 'synapse') diff --git a/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres b/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres new file mode 100644 index 0000000000..b9bbb18a91 --- /dev/null +++ b/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres @@ -0,0 +1,18 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We want to store large retry intervals so we upgrade the column from INT +-- to BIGINT. We don't need to do this on SQLite. +ALTER TABLE destinations ALTER retry_interval SET DATA TYPE BIGINT; diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index a5f2fbef5c..af69587196 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -29,7 +29,7 @@ MIN_RETRY_INTERVAL = 10 * 60 * 1000 RETRY_MULTIPLIER = 5 # a cap on the backoff. (Essentially none) -MAX_RETRY_INTERVAL = 2 ** 63 +MAX_RETRY_INTERVAL = 2 ** 62 class NotRetryingDestination(Exception): diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py index a771d5af29..8e817e2c7f 100644 --- a/tests/storage/test_transactions.py +++ b/tests/storage/test_transactions.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from synapse.util.retryutils import MAX_RETRY_INTERVAL + from tests.unittest import HomeserverTestCase @@ -45,3 +47,12 @@ class TransactionStoreTestCase(HomeserverTestCase): """ d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100) self.get_success(d) + + def test_large_destination_retry(self): + d = self.store.set_destination_retry_timings( + "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL + ) + self.get_success(d) + + d = self.store.get_destination_retry_timings("example.com") + self.get_success(d) -- cgit 1.4.1 From 5705ecaec6b7a85c152691c79a4fa3526792a3eb Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 2 Oct 2019 11:16:38 +0100 Subject: Don't 500 code when trying to exchange a revoked 3PID invite While this is not documented in the spec (but should be), Riot (and other clients) revoke 3PID invites by sending a m.room.third_party_invite event with an empty ({}) content to the room's state. When the invited 3PID gets associated with a MXID, the identity server (which doesn't know about revocations) sends down to the MXID's homeserver all of the undelivered invites it has for this 3PID. The homeserver then tries to talk to the inviting homeserver in order to exchange these invite for m.room.member events. When one of the invite is revoked, the inviting homeserver responds with a 500 error because it tries to extract a 'display_name' property from the content, which is empty. This might cause the invited server to consider that the server is down and not try to exchange other, valid invites (or at least delay it). This fix handles the case of revoked invites by avoiding trying to fetch a 'display_name' from the original invite's content, and letting the m.room.member event fail the auth rules (because, since the original invite's content is empty, it doesn't have public keys), which results in sending a 403 with the correct error message to the invited server. --- synapse/handlers/federation.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f72b81d419..a3d7739ead 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2599,8 +2599,19 @@ class FederationHandler(BaseHandler): original_invite_id, allow_none=True ) if original_invite: - display_name = original_invite.content["display_name"] - event_dict["content"]["third_party_invite"]["display_name"] = display_name + # If the m.room.third_party_invite event's content is empty, it means the + # invite has been revoked. + if original_invite.content: + display_name = original_invite.content["display_name"] + event_dict["content"]["third_party_invite"]["display_name"] = display_name + else: + # Don't discard or raise an error here because that's not the right place + # to do auth checks. The auth check will fail on this invite because we + # won't be able to fetch public keys from the m.room.third_party_invite + # event's content (because it's empty). + logger.info( + "Found invite event for third_party_invite but it has been revoked" + ) else: logger.info( "Could not find invite event for third_party_invite: %r", event_dict -- cgit 1.4.1 From d69fd53f74f693e0ec756eec479f3d51d93fd2aa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 11:21:52 +0100 Subject: Bound find_next_generated_user_id DB query. We can easily bound the set of user IDs we pull out of the DB, so lets do that. --- synapse/storage/registration.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 241a7be51e..1a859352b6 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -493,7 +493,9 @@ class RegistrationWorkerStore(SQLBaseStore): """ def _find_next_generated_user_id(txn): - txn.execute("SELECT name FROM users") + # We bound between '@1' and '@a' to avoid pulling the entire table + # out. + txn.execute("SELECT name FROM users WHERE '@1' <= name AND name < '@a'") regex = re.compile(r"^@(\d+):") -- cgit 1.4.1 From 2a1470cd05558d4a3bc69a0bc5e8969ba8631426 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 2 Oct 2019 12:04:22 +0100 Subject: Fix yields and copy instead of move push rules on room upgrade (#6144) Copy push rules during a room upgrade from the old room to the new room, instead of deleting them from the old room. For instance, we've defined upgrading of a room multiple times to be possible, and push rules won't be transferred on the second upgrade if they're deleted during the first. Also fix some missing yields that probably broke things quite a bit. --- changelog.d/6144.bugfix | 1 + synapse/handlers/room_member.py | 4 ++-- synapse/storage/push_rule.py | 16 ++++++---------- 3 files changed, 9 insertions(+), 12 deletions(-) create mode 100644 changelog.d/6144.bugfix (limited to 'synapse') diff --git a/changelog.d/6144.bugfix b/changelog.d/6144.bugfix new file mode 100644 index 0000000000..eee63961e4 --- /dev/null +++ b/changelog.d/6144.bugfix @@ -0,0 +1 @@ +Prevent user push rules being deleted from a room when it is upgraded. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 8abdb1b6e6..95a244d86c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -216,8 +216,8 @@ class RoomMemberHandler(object): self.copy_room_tags_and_direct_to_room( predecessor["room_id"], room_id, user_id ) - # Move over old push rules - self.store.move_push_rules_from_room_to_room_for_user( + # Copy over push rules + yield self.store.copy_push_rules_from_room_to_room_for_user( predecessor["room_id"], room_id, user_id ) elif event.membership == Membership.LEAVE: diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index a6517c4cf3..c4e24edff2 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -183,8 +183,8 @@ class PushRulesWorkerStore( return results @defer.inlineCallbacks - def move_push_rule_from_room_to_room(self, new_room_id, user_id, rule): - """Move a single push rule from one room to another for a specific user. + def copy_push_rule_from_room_to_room(self, new_room_id, user_id, rule): + """Copy a single push rule from one room to another for a specific user. Args: new_room_id (str): ID of the new room. @@ -209,14 +209,11 @@ class PushRulesWorkerStore( actions=rule["actions"], ) - # Delete push rule for the old room - yield self.delete_push_rule(user_id, rule["rule_id"]) - @defer.inlineCallbacks - def move_push_rules_from_room_to_room_for_user( + def copy_push_rules_from_room_to_room_for_user( self, old_room_id, new_room_id, user_id ): - """Move all of the push rules from one room to another for a specific + """Copy all of the push rules from one room to another for a specific user. Args: @@ -227,15 +224,14 @@ class PushRulesWorkerStore( # Retrieve push rules for this user user_push_rules = yield self.get_push_rules_for_user(user_id) - # Get rules relating to the old room, move them to the new room, then - # delete them from the old room + # Get rules relating to the old room and copy them to the new room for rule in user_push_rules: conditions = rule.get("conditions", []) if any( (c.get("key") == "room_id" and c.get("pattern") == old_room_id) for c in conditions ): - self.move_push_rule_from_room_to_room(new_room_id, user_id, rule) + yield self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule) @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): -- cgit 1.4.1 From 972c9f65d7ddf94ce024b57397d651d184cb2d26 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 2 Oct 2019 12:17:46 +0100 Subject: Lint --- synapse/handlers/federation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a3d7739ead..75d79bb8e4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2603,7 +2603,9 @@ class FederationHandler(BaseHandler): # invite has been revoked. if original_invite.content: display_name = original_invite.content["display_name"] - event_dict["content"]["third_party_invite"]["display_name"] = display_name + event_dict["content"]["third_party_invite"][ + "display_name" + ] = display_name else: # Don't discard or raise an error here because that's not the right place # to do auth checks. The auth check will fail on this invite because we -- cgit 1.4.1 From 864f14454322c6cba11476667ade8fc6cbea6f44 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 2 Oct 2019 05:29:01 -0700 Subject: Fix up some typechecking (#6150) * type checking fixes * changelog --- .gitignore | 1 + changelog.d/6150.misc | 1 + synapse/api/errors.py | 3 ++- synapse/api/room_versions.py | 5 ++++- synapse/app/_base.py | 4 +++- synapse/config/appservice.py | 5 +++-- synapse/config/consent_config.py | 4 ++-- synapse/config/password_auth_providers.py | 4 +++- synapse/config/repository.py | 5 +++-- synapse/config/server.py | 10 +++++++--- synapse/config/server_notices_config.py | 4 ++-- synapse/logging/opentracing.py | 9 +++++---- synapse/logging/utils.py | 20 ++++++++++++++++---- synapse/metrics/__init__.py | 4 ++-- synapse/metrics/_exposition.py | 4 ++-- synapse/python_dependencies.py | 17 +++++++++++++---- synapse/types.py | 3 ++- synapse/util/async_helpers.py | 10 +++++++--- synapse/util/caches/__init__.py | 3 ++- synapse/util/caches/descriptors.py | 22 ++++++++++++++++++++-- synapse/util/caches/treecache.py | 4 +++- synapse/util/module_loader.py | 2 +- 22 files changed, 104 insertions(+), 40 deletions(-) create mode 100644 changelog.d/6150.misc (limited to 'synapse') diff --git a/.gitignore b/.gitignore index e53d4908d5..747b8714d7 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ *.tac _trial_temp/ _trial_temp*/ +/out # stuff that is likely to exist when you run a server locally /*.db diff --git a/changelog.d/6150.misc b/changelog.d/6150.misc new file mode 100644 index 0000000000..a373c091ab --- /dev/null +++ b/changelog.d/6150.misc @@ -0,0 +1 @@ +Expand type-checking on modules imported by synapse.config. diff --git a/synapse/api/errors.py b/synapse/api/errors.py index cf1ebf1af2..1bb2e86789 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -17,6 +17,7 @@ """Contains exceptions and error codes.""" import logging +from typing import Dict from six import iteritems from six.moves import http_client @@ -111,7 +112,7 @@ class ProxiedRequestError(SynapseError): def __init__(self, code, msg, errcode=Codes.UNKNOWN, additional_fields=None): super(ProxiedRequestError, self).__init__(code, msg, errcode) if additional_fields is None: - self._additional_fields = {} + self._additional_fields = {} # type: Dict else: self._additional_fields = dict(additional_fields) diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index 95292b7dec..c6f50fd7b9 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -12,6 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from typing import Dict + import attr @@ -102,4 +105,4 @@ KNOWN_ROOM_VERSIONS = { RoomVersions.V4, RoomVersions.V5, ) -} # type: dict[str, RoomVersion] +} # type: Dict[str, RoomVersion] diff --git a/synapse/app/_base.py b/synapse/app/_base.py index c30fdeee9a..2ac7d5c064 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -263,7 +263,9 @@ def start(hs, listeners=None): refresh_certificate(hs) # Start the tracer - synapse.logging.opentracing.init_tracer(hs.config) + synapse.logging.opentracing.init_tracer( # type: ignore[attr-defined] # noqa + hs.config + ) # It is now safe to start your Synapse. hs.start_listening(listeners) diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index 8387ff6805..28d36b1bc3 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from typing import Dict from six import string_types from six.moves.urllib import parse as urlparse @@ -56,8 +57,8 @@ def load_appservices(hostname, config_files): return [] # Dicts of value -> filename - seen_as_tokens = {} - seen_ids = {} + seen_as_tokens = {} # type: Dict[str, str] + seen_ids = {} # type: Dict[str, str] appservices = [] diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index 94916f3a49..48976e17b1 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -73,8 +73,8 @@ DEFAULT_CONFIG = """\ class ConsentConfig(Config): - def __init__(self): - super(ConsentConfig, self).__init__() + def __init__(self, *args): + super(ConsentConfig, self).__init__(*args) self.user_consent_version = None self.user_consent_template_dir = None diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index 788c39c9fb..c50e244394 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, List + from synapse.util.module_loader import load_module from ._base import Config @@ -22,7 +24,7 @@ LDAP_PROVIDER = "ldap_auth_provider.LdapAuthProvider" class PasswordAuthProviderConfig(Config): def read_config(self, config, **kwargs): - self.password_providers = [] + self.password_providers = [] # type: List[Any] providers = [] # We want to be backwards compatible with the old `ldap_config` diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 52e014608a..14740891f3 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -15,6 +15,7 @@ import os from collections import namedtuple +from typing import Dict, List from synapse.python_dependencies import DependencyException, check_requirements from synapse.util.module_loader import load_module @@ -61,7 +62,7 @@ def parse_thumbnail_requirements(thumbnail_sizes): Dictionary mapping from media type string to list of ThumbnailRequirement tuples. """ - requirements = {} + requirements = {} # type: Dict[str, List] for size in thumbnail_sizes: width = size["width"] height = size["height"] @@ -130,7 +131,7 @@ class ContentRepositoryConfig(Config): # # We don't create the storage providers here as not all workers need # them to be started. - self.media_storage_providers = [] + self.media_storage_providers = [] # type: List[tuple] for provider_config in storage_providers: # We special case the module "file_system" so as not to need to diff --git a/synapse/config/server.py b/synapse/config/server.py index 536ee7f29c..709bd387e5 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -19,6 +19,7 @@ import logging import os.path import re from textwrap import indent +from typing import List import attr import yaml @@ -243,7 +244,7 @@ class ServerConfig(Config): # events with profile information that differ from the target's global profile. self.allow_per_room_profiles = config.get("allow_per_room_profiles", True) - self.listeners = [] + self.listeners = [] # type: List[dict] for listener in config.get("listeners", []): if not isinstance(listener.get("port", None), int): raise ConfigError( @@ -287,7 +288,10 @@ class ServerConfig(Config): validator=attr.validators.instance_of(bool), default=False ) complexity = attr.ib( - validator=attr.validators.instance_of((int, float)), default=1.0 + validator=attr.validators.instance_of( + (float, int) # type: ignore[arg-type] # noqa + ), + default=1.0, ) complexity_error = attr.ib( validator=attr.validators.instance_of(str), @@ -366,7 +370,7 @@ class ServerConfig(Config): "cleanup_extremities_with_dummy_events", True ) - def has_tls_listener(self): + def has_tls_listener(self) -> bool: return any(l["tls"] for l in self.listeners) def generate_config_section( diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index eaac3d73bc..6d4285ef93 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -59,8 +59,8 @@ class ServerNoticesConfig(Config): None if server notices are not enabled. """ - def __init__(self): - super(ServerNoticesConfig, self).__init__() + def __init__(self, *args): + super(ServerNoticesConfig, self).__init__(*args) self.server_notices_mxid = None self.server_notices_mxid_display_name = None self.server_notices_mxid_avatar_url = None diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 308a27213b..cd1ff6a518 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -170,6 +170,7 @@ import inspect import logging import re from functools import wraps +from typing import Dict from canonicaljson import json @@ -547,7 +548,7 @@ def inject_active_span_twisted_headers(headers, destination, check_destination=T return span = opentracing.tracer.active_span - carrier = {} + carrier = {} # type: Dict[str, str] opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) for key, value in carrier.items(): @@ -584,7 +585,7 @@ def inject_active_span_byte_dict(headers, destination, check_destination=True): span = opentracing.tracer.active_span - carrier = {} + carrier = {} # type: Dict[str, str] opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) for key, value in carrier.items(): @@ -639,7 +640,7 @@ def get_active_span_text_map(destination=None): if destination and not whitelisted_homeserver(destination): return {} - carrier = {} + carrier = {} # type: Dict[str, str] opentracing.tracer.inject( opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier ) @@ -653,7 +654,7 @@ def active_span_context_as_string(): Returns: The active span context encoded as a string. """ - carrier = {} + carrier = {} # type: Dict[str, str] if opentracing: opentracing.tracer.inject( opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier diff --git a/synapse/logging/utils.py b/synapse/logging/utils.py index 7df0fa6087..6073fc2725 100644 --- a/synapse/logging/utils.py +++ b/synapse/logging/utils.py @@ -119,7 +119,11 @@ def trace_function(f): logger = logging.getLogger(name) level = logging.DEBUG - s = inspect.currentframe().f_back + frame = inspect.currentframe() + if frame is None: + raise Exception("Can't get current frame!") + + s = frame.f_back to_print = [ "\t%s:%s %s. Args: args=%s, kwargs=%s" @@ -144,7 +148,7 @@ def trace_function(f): pathname=pathname, lineno=lineno, msg=msg, - args=None, + args=tuple(), exc_info=None, ) @@ -157,7 +161,12 @@ def trace_function(f): def get_previous_frames(): - s = inspect.currentframe().f_back.f_back + + frame = inspect.currentframe() + if frame is None: + raise Exception("Can't get current frame!") + + s = frame.f_back.f_back to_return = [] while s: if s.f_globals["__name__"].startswith("synapse"): @@ -174,7 +183,10 @@ def get_previous_frames(): def get_previous_frame(ignore=[]): - s = inspect.currentframe().f_back.f_back + frame = inspect.currentframe() + if frame is None: + raise Exception("Can't get current frame!") + s = frame.f_back.f_back while s: if s.f_globals["__name__"].startswith("synapse"): diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index bec3b13397..0b45e1f52a 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -125,7 +125,7 @@ class InFlightGauge(object): ) # Counts number of in flight blocks for a given set of label values - self._registrations = {} + self._registrations = {} # type: Dict # Protects access to _registrations self._lock = threading.Lock() @@ -226,7 +226,7 @@ class BucketCollector(object): # Fetch the data -- this must be synchronous! data = self.data_collector() - buckets = {} + buckets = {} # type: Dict[float, int] res = [] for x in data.keys(): diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py index 74d9c3ecd3..a248103191 100644 --- a/synapse/metrics/_exposition.py +++ b/synapse/metrics/_exposition.py @@ -36,9 +36,9 @@ from twisted.web.resource import Resource try: from prometheus_client.samples import Sample except ImportError: - Sample = namedtuple( + Sample = namedtuple( # type: ignore[no-redef] # noqa "Sample", ["name", "labels", "value", "timestamp", "exemplar"] - ) # type: ignore + ) CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8") diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 0bd563edc7..aa7da1c543 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import Set +from typing import List, Set from pkg_resources import ( DistributionNotFound, @@ -73,6 +73,7 @@ REQUIREMENTS = [ "netaddr>=0.7.18", "Jinja2>=2.9", "bleach>=1.4.3", + "typing-extensions>=3.7.4", ] CONDITIONAL_REQUIREMENTS = { @@ -144,7 +145,11 @@ def check_requirements(for_feature=None): deps_needed.append(dependency) errors.append( "Needed %s, got %s==%s" - % (dependency, e.dist.project_name, e.dist.version) + % ( + dependency, + e.dist.project_name, # type: ignore[attr-defined] # noqa + e.dist.version, # type: ignore[attr-defined] # noqa + ) ) except DistributionNotFound: deps_needed.append(dependency) @@ -159,7 +164,7 @@ def check_requirements(for_feature=None): if not for_feature: # Check the optional dependencies are up to date. We allow them to not be # installed. - OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), []) + OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), []) # type: List[str] for dependency in OPTS: try: @@ -168,7 +173,11 @@ def check_requirements(for_feature=None): deps_needed.append(dependency) errors.append( "Needed optional %s, got %s==%s" - % (dependency, e.dist.project_name, e.dist.version) + % ( + dependency, + e.dist.project_name, # type: ignore[attr-defined] # noqa + e.dist.version, # type: ignore[attr-defined] # noqa + ) ) except DistributionNotFound: # If it's not found, we don't care diff --git a/synapse/types.py b/synapse/types.py index 51eadb6ad4..8f79797f17 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -318,6 +318,7 @@ class StreamToken( ) ): _SEPARATOR = "_" + START = None # type: StreamToken @classmethod def from_string(cls, string): @@ -402,7 +403,7 @@ class RoomStreamToken(namedtuple("_StreamToken", "topological stream")): followed by the "stream_ordering" id of the event it comes after. """ - __slots__ = [] + __slots__ = [] # type: list @classmethod def parse(cls, string): diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index f1c46836b1..0d3bdd88ce 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -13,9 +13,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import collections import logging from contextlib import contextmanager +from typing import Dict, Sequence, Set, Union from six.moves import range @@ -213,7 +215,9 @@ class Linearizer(object): # the first element is the number of things executing, and # the second element is an OrderedDict, where the keys are deferreds for the # things blocked from executing. - self.key_to_defer = {} + self.key_to_defer = ( + {} + ) # type: Dict[str, Sequence[Union[int, Dict[defer.Deferred, int]]]] def queue(self, key): # we avoid doing defer.inlineCallbacks here, so that cancellation works correctly. @@ -340,10 +344,10 @@ class ReadWriteLock(object): def __init__(self): # Latest readers queued - self.key_to_current_readers = {} + self.key_to_current_readers = {} # type: Dict[str, Set[defer.Deferred]] # Latest writer queued - self.key_to_current_writer = {} + self.key_to_current_writer = {} # type: Dict[str, defer.Deferred] @defer.inlineCallbacks def read(self, key): diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index b50e3503f0..43fd65d693 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -16,6 +16,7 @@ import logging import os +from typing import Dict import six from six.moves import intern @@ -37,7 +38,7 @@ def get_cache_factor_for(cache_name): caches_by_name = {} -collectors_by_name = {} +collectors_by_name = {} # type: Dict cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"]) cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"]) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 43f66ec4be..5ac2530a6a 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -18,10 +18,12 @@ import inspect import logging import threading from collections import namedtuple +from typing import Any, cast from six import itervalues from prometheus_client import Gauge +from typing_extensions import Protocol from twisted.internet import defer @@ -37,6 +39,18 @@ from . import register_cache logger = logging.getLogger(__name__) +class _CachedFunction(Protocol): + invalidate = None # type: Any + invalidate_all = None # type: Any + invalidate_many = None # type: Any + prefill = None # type: Any + cache = None # type: Any + num_args = None # type: Any + + def __name__(self): + ... + + cache_pending_metric = Gauge( "synapse_util_caches_cache_pending", "Number of lookups currently pending for this cache", @@ -245,7 +259,9 @@ class Cache(object): class _CacheDescriptorBase(object): - def __init__(self, orig, num_args, inlineCallbacks, cache_context=False): + def __init__( + self, orig: _CachedFunction, num_args, inlineCallbacks, cache_context=False + ): self.orig = orig if inlineCallbacks: @@ -404,7 +420,7 @@ class CacheDescriptor(_CacheDescriptorBase): return tuple(get_cache_key_gen(args, kwargs)) @functools.wraps(self.orig) - def wrapped(*args, **kwargs): + def _wrapped(*args, **kwargs): # If we're passed a cache_context then we'll want to call its invalidate() # whenever we are invalidated invalidate_callback = kwargs.pop("on_invalidate", None) @@ -440,6 +456,8 @@ class CacheDescriptor(_CacheDescriptorBase): return make_deferred_yieldable(observer) + wrapped = cast(_CachedFunction, _wrapped) + if self.num_args == 1: wrapped.invalidate = lambda key: cache.invalidate(key[0]) wrapped.prefill = lambda key, val: cache.prefill(key[0], val) diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index 9a72218d85..2ea4e4e911 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -1,3 +1,5 @@ +from typing import Dict + from six import itervalues SENTINEL = object() @@ -12,7 +14,7 @@ class TreeCache(object): def __init__(self): self.size = 0 - self.root = {} + self.root = {} # type: Dict def __setitem__(self, key, value): return self.set(key, value) diff --git a/synapse/util/module_loader.py b/synapse/util/module_loader.py index 7ff7eb1e4d..2705cbe5f8 100644 --- a/synapse/util/module_loader.py +++ b/synapse/util/module_loader.py @@ -54,5 +54,5 @@ def load_python_module(location: str): if spec is None: raise Exception("Unable to load module at %s" % (location,)) mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) + spec.loader.exec_module(mod) # type: ignore return mod -- cgit 1.4.1 From aec1377d0ba09628ffc399eb977ef507fecd1d28 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 2 Oct 2019 13:55:00 +0100 Subject: 1.4.0rc2 --- CHANGES.md | 18 ++++++++++++++++++ changelog.d/6117.misc | 1 - changelog.d/6135.bugfix | 1 - changelog.d/6141.bugfix | 1 - changelog.d/6145.bugfix | 1 - changelog.d/6146.bugfix | 1 - synapse/__init__.py | 2 +- 7 files changed, 19 insertions(+), 6 deletions(-) delete mode 100644 changelog.d/6117.misc delete mode 100644 changelog.d/6135.bugfix delete mode 100644 changelog.d/6141.bugfix delete mode 100644 changelog.d/6145.bugfix delete mode 100644 changelog.d/6146.bugfix (limited to 'synapse') diff --git a/CHANGES.md b/CHANGES.md index 0a0d0b3439..78322a08c1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,21 @@ +Synapse 1.4.0rc2 (2019-10-02) +============================= + +Bugfixes +-------- + +- Fix bug in background update that adds last seen information to the `devices` table, and improve its performance on Postgres. ([\#6135](https://github.com/matrix-org/synapse/issues/6135)) +- Fix bad performance of censoring redactions background task. ([\#6141](https://github.com/matrix-org/synapse/issues/6141)) +- Fix fetching censored redactions from DB, which caused APIs like initial sync to fail if it tried to include the censored redaction. ([\#6145](https://github.com/matrix-org/synapse/issues/6145)) +- Fix exceptions when storing large retry intervals for down remote servers. ([\#6146](https://github.com/matrix-org/synapse/issues/6146)) + + +Internal Changes +---------------- + +- Fix up sample config entry for `redaction_retention_period` option. ([\#6117](https://github.com/matrix-org/synapse/issues/6117)) + + Synapse 1.4.0rc1 (2019-09-26) ============================= diff --git a/changelog.d/6117.misc b/changelog.d/6117.misc deleted file mode 100644 index f8bdb58f41..0000000000 --- a/changelog.d/6117.misc +++ /dev/null @@ -1 +0,0 @@ -Fix up sample config entry for `redaction_retention_period` option. diff --git a/changelog.d/6135.bugfix b/changelog.d/6135.bugfix deleted file mode 100644 index 5f9f010cb1..0000000000 --- a/changelog.d/6135.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug in background update that adds last seen information to the `devices` table, and improve its performance on Postgres. diff --git a/changelog.d/6141.bugfix b/changelog.d/6141.bugfix deleted file mode 100644 index c93920b7b5..0000000000 --- a/changelog.d/6141.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bad performance of censoring redactions background task. diff --git a/changelog.d/6145.bugfix b/changelog.d/6145.bugfix deleted file mode 100644 index 9e0eb5dd4c..0000000000 --- a/changelog.d/6145.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix fetching censored redactions from DB, which caused APIs like initial sync to fail if it tried to include the censored redaction. diff --git a/changelog.d/6146.bugfix b/changelog.d/6146.bugfix deleted file mode 100644 index 1dad801836..0000000000 --- a/changelog.d/6146.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix exceptions when storing large retry intervals for down remote servers. diff --git a/synapse/__init__.py b/synapse/__init__.py index ddfe9ec542..5197eea22e 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -35,4 +35,4 @@ try: except ImportError: pass -__version__ = "1.4.0rc1" +__version__ = "1.4.0rc2" -- cgit 1.4.1 From a5166e4d5febc0e03ba9da9db99127a797a0bc4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 14:08:35 +0100 Subject: Land improved room list based on room stats (#6019) Use room_stats and room_state for room directory search --- changelog.d/6019.misc | 1 + synapse/federation/transport/server.py | 8 + synapse/handlers/room_list.py | 323 ++++++--------------- synapse/rest/client/v1/room.py | 8 + synapse/storage/room.py | 228 ++++++++++----- .../schema/delta/56/public_room_list_idx.sql | 16 + tests/handlers/test_roomlist.py | 39 --- 7 files changed, 273 insertions(+), 350 deletions(-) create mode 100644 changelog.d/6019.misc create mode 100644 synapse/storage/schema/delta/56/public_room_list_idx.sql delete mode 100644 tests/handlers/test_roomlist.py (limited to 'synapse') diff --git a/changelog.d/6019.misc b/changelog.d/6019.misc new file mode 100644 index 0000000000..dfee73c28f --- /dev/null +++ b/changelog.d/6019.misc @@ -0,0 +1 @@ +Improve performance of the public room list directory. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 7f8a16e355..0f16f21c2d 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -765,6 +765,10 @@ class PublicRoomList(BaseFederationServlet): else: network_tuple = ThirdPartyInstanceID(None, None) + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + data = await maybeDeferred( self.handler.get_local_public_room_list, limit, @@ -800,6 +804,10 @@ class PublicRoomList(BaseFederationServlet): if search_filter is None: logger.warning("Nonefilter") + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + data = await self.handler.get_local_public_room_list( limit=limit, since_token=since_token, diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index a7e55f00e5..4e1cc5460f 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -16,8 +16,7 @@ import logging from collections import namedtuple -from six import PY3, iteritems -from six.moves import range +from six import iteritems import msgpack from unpaddedbase64 import decode_base64, encode_base64 @@ -27,7 +26,6 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules from synapse.api.errors import Codes, HttpResponseException from synapse.types import ThirdPartyInstanceID -from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.descriptors import cachedInlineCallbacks from synapse.util.caches.response_cache import ResponseCache @@ -37,7 +35,6 @@ logger = logging.getLogger(__name__) REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000 - # This is used to indicate we should only return rooms published to the main list. EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) @@ -72,6 +69,8 @@ class RoomListHandler(BaseHandler): This can be (None, None) to indicate the main list, or a particular appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. + from_federation (bool): true iff the request comes from the federation + API """ if not self.enable_room_list_search: return defer.succeed({"chunk": [], "total_room_count_estimate": 0}) @@ -133,239 +132,109 @@ class RoomListHandler(BaseHandler): from_federation (bool): Whether this request originated from a federating server or a client. Used for room filtering. timeout (int|None): Amount of seconds to wait for a response before - timing out. + timing out. TODO """ - if since_token and since_token != "END": - since_token = RoomListNextBatch.from_token(since_token) - else: - since_token = None - rooms_to_order_value = {} - rooms_to_num_joined = {} + # Pagination tokens work by storing the room ID sent in the last batch, + # plus the direction (forwards or backwards). Next batch tokens always + # go forwards, prev batch tokens always go backwards. - newly_visible = [] - newly_unpublished = [] if since_token: - stream_token = since_token.stream_ordering - current_public_id = yield self.store.get_current_public_room_stream_id() - public_room_stream_id = since_token.public_room_stream_id - newly_visible, newly_unpublished = yield self.store.get_public_room_changes( - public_room_stream_id, current_public_id, network_tuple=network_tuple - ) - else: - stream_token = yield self.store.get_room_max_stream_ordering() - public_room_stream_id = yield self.store.get_current_public_room_stream_id() - - room_ids = yield self.store.get_public_room_ids_at_stream_id( - public_room_stream_id, network_tuple=network_tuple - ) - - # We want to return rooms in a particular order: the number of joined - # users. We then arbitrarily use the room_id as a tie breaker. - - @defer.inlineCallbacks - def get_order_for_room(room_id): - # Most of the rooms won't have changed between the since token and - # now (especially if the since token is "now"). So, we can ask what - # the current users are in a room (that will hit a cache) and then - # check if the room has changed since the since token. (We have to - # do it in that order to avoid races). - # If things have changed then fall back to getting the current state - # at the since token. - joined_users = yield self.store.get_users_in_room(room_id) - if self.store.has_room_changed_since(room_id, stream_token): - latest_event_ids = yield self.store.get_forward_extremeties_for_room( - room_id, stream_token - ) - - if not latest_event_ids: - return + batch_token = RoomListNextBatch.from_token(since_token) - joined_users = yield self.state_handler.get_current_users_in_room( - room_id, latest_event_ids - ) - - num_joined_users = len(joined_users) - rooms_to_num_joined[room_id] = num_joined_users + last_room_id = batch_token.last_room_id + forwards = batch_token.direction_is_forward + else: + batch_token = None - if num_joined_users == 0: - return + last_room_id = None + forwards = True - # We want larger rooms to be first, hence negating num_joined_users - rooms_to_order_value[room_id] = (-num_joined_users, room_id) + # we request one more than wanted to see if there are more pages to come + probing_limit = limit + 1 if limit is not None else None - logger.info( - "Getting ordering for %i rooms since %s", len(room_ids), stream_token + results = yield self.store.get_largest_public_rooms( + network_tuple, + search_filter, + probing_limit, + last_room_id=last_room_id, + forwards=forwards, + ignore_non_federatable=from_federation, ) - yield concurrently_execute(get_order_for_room, room_ids, 10) - sorted_entries = sorted(rooms_to_order_value.items(), key=lambda e: e[1]) - sorted_rooms = [room_id for room_id, _ in sorted_entries] + def build_room_entry(room): + entry = { + "room_id": room["room_id"], + "name": room["name"], + "topic": room["topic"], + "canonical_alias": room["canonical_alias"], + "num_joined_members": room["joined_members"], + "avatar_url": room["avatar"], + "world_readable": room["history_visibility"] == "world_readable", + "guest_can_join": room["guest_access"] == "can_join", + } - # `sorted_rooms` should now be a list of all public room ids that is - # stable across pagination. Therefore, we can use indices into this - # list as our pagination tokens. + # Filter out Nones – rather omit the field altogether + return {k: v for k, v in entry.items() if v is not None} - # Filter out rooms that we don't want to return - rooms_to_scan = [ - r - for r in sorted_rooms - if r not in newly_unpublished and rooms_to_num_joined[r] > 0 - ] + results = [build_room_entry(r) for r in results] - total_room_count = len(rooms_to_scan) + response = {} + num_results = len(results) + if limit is not None: + more_to_come = num_results == probing_limit - if since_token: - # Filter out rooms we've already returned previously - # `since_token.current_limit` is the index of the last room we - # sent down, so we exclude it and everything before/after it. - if since_token.direction_is_forward: - rooms_to_scan = rooms_to_scan[since_token.current_limit + 1 :] + # Depending on direction we trim either the front or back. + if forwards: + results = results[:limit] else: - rooms_to_scan = rooms_to_scan[: since_token.current_limit] - rooms_to_scan.reverse() - - logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) - - # _append_room_entry_to_chunk will append to chunk but will stop if - # len(chunk) > limit - # - # Normally we will generate enough results on the first iteration here, - # but if there is a search filter, _append_room_entry_to_chunk may - # filter some results out, in which case we loop again. - # - # We don't want to scan over the entire range either as that - # would potentially waste a lot of work. - # - # XXX if there is no limit, we may end up DoSing the server with - # calls to get_current_state_ids for every single room on the - # server. Surely we should cap this somehow? - # - if limit: - step = limit + 1 + results = results[-limit:] else: - # step cannot be zero - step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1 - - chunk = [] - for i in range(0, len(rooms_to_scan), step): - if timeout and self.clock.time() > timeout: - raise Exception("Timed out searching room directory") - - batch = rooms_to_scan[i : i + step] - logger.info("Processing %i rooms for result", len(batch)) - yield concurrently_execute( - lambda r: self._append_room_entry_to_chunk( - r, - rooms_to_num_joined[r], - chunk, - limit, - search_filter, - from_federation=from_federation, - ), - batch, - 5, - ) - logger.info("Now %i rooms in result", len(chunk)) - if len(chunk) >= limit + 1: - break - - chunk.sort(key=lambda e: (-e["num_joined_members"], e["room_id"])) - - # Work out the new limit of the batch for pagination, or None if we - # know there are no more results that would be returned. - # i.e., [since_token.current_limit..new_limit] is the batch of rooms - # we've returned (or the reverse if we paginated backwards) - # We tried to pull out limit + 1 rooms above, so if we have <= limit - # then we know there are no more results to return - new_limit = None - if chunk and (not limit or len(chunk) > limit): - - if not since_token or since_token.direction_is_forward: - if limit: - chunk = chunk[:limit] - last_room_id = chunk[-1]["room_id"] + more_to_come = False + + if num_results > 0: + final_room_id = results[-1]["room_id"] + initial_room_id = results[0]["room_id"] + + if forwards: + if batch_token: + # If there was a token given then we assume that there + # must be previous results. + response["prev_batch"] = RoomListNextBatch( + last_room_id=initial_room_id, direction_is_forward=False + ).to_token() + + if more_to_come: + response["next_batch"] = RoomListNextBatch( + last_room_id=final_room_id, direction_is_forward=True + ).to_token() else: - if limit: - chunk = chunk[-limit:] - last_room_id = chunk[0]["room_id"] - - new_limit = sorted_rooms.index(last_room_id) - - results = {"chunk": chunk, "total_room_count_estimate": total_room_count} - - if since_token: - results["new_rooms"] = bool(newly_visible) - - if not since_token or since_token.direction_is_forward: - if new_limit is not None: - results["next_batch"] = RoomListNextBatch( - stream_ordering=stream_token, - public_room_stream_id=public_room_stream_id, - current_limit=new_limit, - direction_is_forward=True, - ).to_token() - - if since_token: - results["prev_batch"] = since_token.copy_and_replace( - direction_is_forward=False, - current_limit=since_token.current_limit + 1, - ).to_token() - else: - if new_limit is not None: - results["prev_batch"] = RoomListNextBatch( - stream_ordering=stream_token, - public_room_stream_id=public_room_stream_id, - current_limit=new_limit, - direction_is_forward=False, - ).to_token() - - if since_token: - results["next_batch"] = since_token.copy_and_replace( - direction_is_forward=True, - current_limit=since_token.current_limit - 1, - ).to_token() - - return results - - @defer.inlineCallbacks - def _append_room_entry_to_chunk( - self, - room_id, - num_joined_users, - chunk, - limit, - search_filter, - from_federation=False, - ): - """Generate the entry for a room in the public room list and append it - to the `chunk` if it matches the search filter - - Args: - room_id (str): The ID of the room. - num_joined_users (int): The number of joined users in the room. - chunk (list) - limit (int|None): Maximum amount of rooms to display. Function will - return if length of chunk is greater than limit + 1. - search_filter (dict|None) - from_federation (bool): Whether this request originated from a - federating server or a client. Used for room filtering. - """ - if limit and len(chunk) > limit + 1: - # We've already got enough, so lets just drop it. - return + if batch_token: + response["next_batch"] = RoomListNextBatch( + last_room_id=final_room_id, direction_is_forward=True + ).to_token() + + if more_to_come: + response["prev_batch"] = RoomListNextBatch( + last_room_id=initial_room_id, direction_is_forward=False + ).to_token() + + for room in results: + # populate search result entries with additional fields, namely + # 'aliases' + room_id = room["room_id"] + + aliases = yield self.store.get_aliases_for_room(room_id) + if aliases: + room["aliases"] = aliases - result = yield self.generate_room_entry(room_id, num_joined_users) - if not result: - return + response["chunk"] = results - if from_federation and not result.get("m.federate", True): - # This is a room that other servers cannot join. Do not show them - # this room. - return + response["total_room_count_estimate"] = yield self.store.count_public_rooms( + network_tuple, ignore_non_federatable=from_federation + ) - if _matches_room_entry(result, search_filter): - chunk.append(result) + return response @cachedInlineCallbacks(num_args=1, cache_context=True) def generate_room_entry( @@ -580,32 +449,18 @@ class RoomListNextBatch( namedtuple( "RoomListNextBatch", ( - "stream_ordering", # stream_ordering of the first public room list - "public_room_stream_id", # public room stream id for first public room list - "current_limit", # The number of previous rooms returned + "last_room_id", # The room_id to get rooms after/before "direction_is_forward", # Bool if this is a next_batch, false if prev_batch ), ) ): - - KEY_DICT = { - "stream_ordering": "s", - "public_room_stream_id": "p", - "current_limit": "n", - "direction_is_forward": "d", - } + KEY_DICT = {"last_room_id": "r", "direction_is_forward": "d"} REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()} @classmethod def from_token(cls, token): - if PY3: - # The argument raw=False is only available on new versions of - # msgpack, and only really needed on Python 3. Gate it behind - # a PY3 check to avoid causing issues on Debian-packaged versions. - decoded = msgpack.loads(decode_base64(token), raw=False) - else: - decoded = msgpack.loads(decode_base64(token)) + decoded = msgpack.loads(decode_base64(token), raw=False) return RoomListNextBatch( **{cls.REVERSE_KEY_DICT[key]: val for key, val in decoded.items()} ) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 6bf924dedc..9c1d41421c 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -361,6 +361,10 @@ class PublicRoomListRestServlet(TransactionRestServlet): limit = parse_integer(request, "limit", 0) since_token = parse_string(request, "since", None) + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + handler = self.hs.get_room_list_handler() if server: data = yield handler.get_remote_public_room_list( @@ -398,6 +402,10 @@ class PublicRoomListRestServlet(TransactionRestServlet): else: network_tuple = ThirdPartyInstanceID.from_string(third_party_instance_id) + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + handler = self.hs.get_room_list_handler() if server: data = yield handler.get_remote_public_room_list( diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 08e13f3a3b..c02787a73d 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -63,103 +64,176 @@ class RoomWorkerStore(SQLBaseStore): desc="get_public_room_ids", ) - @cached(num_args=2, max_entries=100) - def get_public_room_ids_at_stream_id(self, stream_id, network_tuple): - """Get pulbic rooms for a particular list, or across all lists. + def count_public_rooms(self, network_tuple, ignore_non_federatable): + """Counts the number of public rooms as tracked in the room_stats_current + and room_stats_state table. Args: - stream_id (int) - network_tuple (ThirdPartyInstanceID): The list to use (None, None) - means the main list, None means all lsits. + network_tuple (ThirdPartyInstanceID|None) + ignore_non_federatable (bool): If true filters out non-federatable rooms """ - return self.runInteraction( - "get_public_room_ids_at_stream_id", - self.get_public_room_ids_at_stream_id_txn, - stream_id, - network_tuple=network_tuple, - ) - - def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, network_tuple): - return { - rm - for rm, vis in self.get_published_at_stream_id_txn( - txn, stream_id, network_tuple=network_tuple - ).items() - if vis - } - def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple): - if network_tuple: - # We want to get from a particular list. No aggregation required. + def _count_public_rooms_txn(txn): + query_args = [] + + if network_tuple: + if network_tuple.appservice_id: + published_sql = """ + SELECT room_id from appservice_room_list + WHERE appservice_id = ? AND network_id = ? + """ + query_args.append(network_tuple.appservice_id) + query_args.append(network_tuple.network_id) + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + """ + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + UNION SELECT room_id from appservice_room_list + """ sql = """ - SELECT room_id, visibility FROM public_room_list_stream - INNER JOIN ( - SELECT room_id, max(stream_id) AS stream_id - FROM public_room_list_stream - WHERE stream_id <= ? %s - GROUP BY room_id - ) grouped USING (room_id, stream_id) - """ + SELECT + COALESCE(COUNT(*), 0) + FROM ( + %(published_sql)s + ) published + INNER JOIN room_stats_state USING (room_id) + INNER JOIN room_stats_current USING (room_id) + WHERE + ( + join_rules = 'public' OR history_visibility = 'world_readable' + ) + AND joined_members > 0 + """ % { + "published_sql": published_sql + } - if network_tuple.appservice_id is not None: - txn.execute( - sql % ("AND appservice_id = ? AND network_id = ?",), - (stream_id, network_tuple.appservice_id, network_tuple.network_id), - ) - else: - txn.execute(sql % ("AND appservice_id IS NULL",), (stream_id,)) - return dict(txn) - else: - # We want to get from all lists, so we need to aggregate the results + txn.execute(sql, query_args) + return txn.fetchone()[0] - logger.info("Executing full list") + return self.runInteraction("count_public_rooms", _count_public_rooms_txn) - sql = """ - SELECT room_id, visibility - FROM public_room_list_stream - INNER JOIN ( - SELECT - room_id, max(stream_id) AS stream_id, appservice_id, - network_id - FROM public_room_list_stream - WHERE stream_id <= ? - GROUP BY room_id, appservice_id, network_id - ) grouped USING (room_id, stream_id) - """ + @defer.inlineCallbacks + def get_largest_public_rooms( + self, + network_tuple, + search_filter, + limit, + last_room_id, + forwards, + ignore_non_federatable=False, + ): + """Gets the largest public rooms (where largest is in terms of joined + members, as tracked in the statistics table). - txn.execute(sql, (stream_id,)) + Args: + network_tuple (ThirdPartyInstanceID|None): + search_filter (dict|None): + limit (int|None): Maxmimum number of rows to return, unlimited otherwise. + last_room_id (str|None): if present, a room ID which bounds the + result set, and is always *excluded* from the result set. + forwards (bool): true iff going forwards, going backwards otherwise + ignore_non_federatable (bool): If true filters out non-federatable rooms. - results = {} - # A room is visible if its visible on any list. - for room_id, visibility in txn: - results[room_id] = bool(visibility) or results.get(room_id, False) + Returns: + Rooms in order: biggest number of joined users first. + We then arbitrarily use the room_id as a tie breaker. - return results + """ - def get_public_room_changes(self, prev_stream_id, new_stream_id, network_tuple): - def get_public_room_changes_txn(txn): - then_rooms = self.get_public_room_ids_at_stream_id_txn( - txn, prev_stream_id, network_tuple - ) + where_clauses = [] + query_args = [] - now_rooms_dict = self.get_published_at_stream_id_txn( - txn, new_stream_id, network_tuple - ) + if last_room_id: + if forwards: + where_clauses.append("room_id < ?") + else: + where_clauses.append("? < room_id") - now_rooms_visible = set(rm for rm, vis in now_rooms_dict.items() if vis) - now_rooms_not_visible = set( - rm for rm, vis in now_rooms_dict.items() if not vis + query_args += [last_room_id] + + if search_filter and search_filter.get("generic_search_term", None): + search_term = "%" + search_filter["generic_search_term"] + "%" + + where_clauses.append( + """ + ( + name LIKE ? + OR topic LIKE ? + OR canonical_alias LIKE ? + ) + """ ) + query_args += [search_term, search_term, search_term] + + if network_tuple: + if network_tuple.appservice_id: + published_sql = """ + SELECT room_id from appservice_room_list + WHERE appservice_id = ? AND network_id = ? + """ + query_args.append(network_tuple.appservice_id) + query_args.append(network_tuple.network_id) + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + """ + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + UNION SELECT room_id from appservice_room_list + """ - newly_visible = now_rooms_visible - then_rooms - newly_unpublished = now_rooms_not_visible & then_rooms + where_clause = "" + if where_clauses: + where_clause = " AND " + " AND ".join(where_clauses) + + sql = """ + SELECT + room_id, name, topic, canonical_alias, joined_members, + avatar, history_visibility, joined_members, guest_access + FROM ( + %(published_sql)s + ) published + INNER JOIN room_stats_state USING (room_id) + INNER JOIN room_stats_current USING (room_id) + WHERE + ( + join_rules = 'public' OR history_visibility = 'world_readable' + ) + AND joined_members > 0 + %(where_clause)s + ORDER BY joined_members %(dir)s, room_id %(dir)s + """ % { + "published_sql": published_sql, + "where_clause": where_clause, + "dir": "DESC" if forwards else "ASC", + } - return newly_visible, newly_unpublished + if limit is not None: + query_args.append(limit) - return self.runInteraction( - "get_public_room_changes", get_public_room_changes_txn + sql += """ + LIMIT ? + """ + + def _get_largest_public_rooms_txn(txn): + txn.execute(sql, query_args) + + results = self.cursor_to_dict(txn) + + if not forwards: + results.reverse() + + return results + + ret_val = yield self.runInteraction( + "get_largest_public_rooms", _get_largest_public_rooms_txn ) + defer.returnValue(ret_val) @cached(max_entries=10000) def is_room_blocked(self, room_id): diff --git a/synapse/storage/schema/delta/56/public_room_list_idx.sql b/synapse/storage/schema/delta/56/public_room_list_idx.sql new file mode 100644 index 0000000000..7be31ffebb --- /dev/null +++ b/synapse/storage/schema/delta/56/public_room_list_idx.sql @@ -0,0 +1,16 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE INDEX public_room_list_stream_network ON public_room_list_stream (appservice_id, network_id, room_id); diff --git a/tests/handlers/test_roomlist.py b/tests/handlers/test_roomlist.py deleted file mode 100644 index 61eebb6985..0000000000 --- a/tests/handlers/test_roomlist.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.handlers.room_list import RoomListNextBatch - -import tests.unittest -import tests.utils - - -class RoomListTestCase(tests.unittest.TestCase): - """ Tests RoomList's RoomListNextBatch. """ - - def setUp(self): - pass - - def test_check_read_batch_tokens(self): - batch_token = RoomListNextBatch( - stream_ordering="abcdef", - public_room_stream_id="123", - current_limit=20, - direction_is_forward=True, - ).to_token() - next_batch = RoomListNextBatch.from_token(batch_token) - self.assertEquals(next_batch.stream_ordering, "abcdef") - self.assertEquals(next_batch.public_room_stream_id, "123") - self.assertEquals(next_batch.current_limit, 20) - self.assertEquals(next_batch.direction_is_forward, True) -- cgit 1.4.1 From 03cf4385e098ae73730b9c5ef695fa3f16c1806f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:09:10 +0100 Subject: Fix public room list pagination. We incorrectly used `room_id` as to bound the result set, even though we order by `joined_members, room_id`, leading to incorrect results after pagination. --- synapse/handlers/room_list.py | 33 +++++++++++++++++++-------- synapse/storage/room.py | 53 +++++++++++++++++++++++++++++-------------- 2 files changed, 59 insertions(+), 27 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 4e1cc5460f..cfed344d4d 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -142,12 +142,12 @@ class RoomListHandler(BaseHandler): if since_token: batch_token = RoomListNextBatch.from_token(since_token) - last_room_id = batch_token.last_room_id + bounds = (batch_token.last_joined_members, batch_token.last_room_id) forwards = batch_token.direction_is_forward else: batch_token = None + bounds = None - last_room_id = None forwards = True # we request one more than wanted to see if there are more pages to come @@ -157,7 +157,7 @@ class RoomListHandler(BaseHandler): network_tuple, search_filter, probing_limit, - last_room_id=last_room_id, + bounds=bounds, forwards=forwards, ignore_non_federatable=from_federation, ) @@ -193,30 +193,38 @@ class RoomListHandler(BaseHandler): more_to_come = False if num_results > 0: - final_room_id = results[-1]["room_id"] - initial_room_id = results[0]["room_id"] + final_entry = results[-1] + initial_entry = results[0] if forwards: if batch_token: # If there was a token given then we assume that there # must be previous results. response["prev_batch"] = RoomListNextBatch( - last_room_id=initial_room_id, direction_is_forward=False + last_joined_members=initial_entry["num_joined_members"], + last_room_id=initial_entry["room_id"], + direction_is_forward=False, ).to_token() if more_to_come: response["next_batch"] = RoomListNextBatch( - last_room_id=final_room_id, direction_is_forward=True + last_joined_members=final_entry["num_joined_members"], + last_room_id=final_entry["room_id"], + direction_is_forward=True, ).to_token() else: if batch_token: response["next_batch"] = RoomListNextBatch( - last_room_id=final_room_id, direction_is_forward=True + last_joined_members=final_entry["num_joined_members"], + last_room_id=final_entry["room_id"], + direction_is_forward=True, ).to_token() if more_to_come: response["prev_batch"] = RoomListNextBatch( - last_room_id=initial_room_id, direction_is_forward=False + last_joined_members=initial_entry["num_joined_members"], + last_room_id=initial_entry["room_id"], + direction_is_forward=False, ).to_token() for room in results: @@ -449,12 +457,17 @@ class RoomListNextBatch( namedtuple( "RoomListNextBatch", ( + "last_joined_members", # The count to get rooms after/before "last_room_id", # The room_id to get rooms after/before "direction_is_forward", # Bool if this is a next_batch, false if prev_batch ), ) ): - KEY_DICT = {"last_room_id": "r", "direction_is_forward": "d"} + KEY_DICT = { + "last_joined_members": "m", + "last_room_id": "r", + "direction_is_forward": "d", + } REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()} diff --git a/synapse/storage/room.py b/synapse/storage/room.py index c02787a73d..9b7e31583c 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -17,6 +17,7 @@ import collections import logging import re +from typing import Optional, Tuple from canonicaljson import json @@ -25,6 +26,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.storage._base import SQLBaseStore from synapse.storage.search import SearchStore +from synapse.types import ThirdPartyInstanceID from synapse.util.caches.descriptors import cached, cachedInlineCallbacks logger = logging.getLogger(__name__) @@ -119,24 +121,25 @@ class RoomWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_largest_public_rooms( self, - network_tuple, - search_filter, - limit, - last_room_id, - forwards, - ignore_non_federatable=False, + network_tuple: Optional[ThirdPartyInstanceID], + search_filter: Optional[dict], + limit: Optional[int], + bounds: Optional[Tuple[int, str]], + forwards: bool, + ignore_non_federatable: bool = False, ): """Gets the largest public rooms (where largest is in terms of joined members, as tracked in the statistics table). Args: - network_tuple (ThirdPartyInstanceID|None): - search_filter (dict|None): - limit (int|None): Maxmimum number of rows to return, unlimited otherwise. - last_room_id (str|None): if present, a room ID which bounds the - result set, and is always *excluded* from the result set. - forwards (bool): true iff going forwards, going backwards otherwise - ignore_non_federatable (bool): If true filters out non-federatable rooms. + network_tuple + search_filter + limit: Maxmimum number of rows to return, unlimited otherwise. + bounds: An uppoer or lower bound to apply to result set if given, + consists of a joined member count and room_id (these are + excluded from result set). + forwards: true iff going forwards, going backwards otherwise + ignore_non_federatable: If true filters out non-federatable rooms. Returns: Rooms in order: biggest number of joined users first. @@ -147,13 +150,29 @@ class RoomWorkerStore(SQLBaseStore): where_clauses = [] query_args = [] - if last_room_id: + # Work out the bounds if we're given them, these bounds look slightly + # odd, but are designed to help query planner use indices by pulling + # out a common bound. + if bounds: + last_joined_members, last_room_id = bounds if forwards: - where_clauses.append("room_id < ?") + where_clauses.append( + """ + joined_members <= ? AND ( + joined_members < ? OR room_id < ? + ) + """ + ) else: - where_clauses.append("? < room_id") + where_clauses.append( + """ + joined_members >= ? AND ( + joined_members > ? OR room_id > ? + ) + """ + ) - query_args += [last_room_id] + query_args += [last_joined_members, last_joined_members, last_room_id] if search_filter and search_filter.get("generic_search_term", None): search_term = "%" + search_filter["generic_search_term"] + "%" -- cgit 1.4.1 From 4c4f44930d2153056dc1b992c571f43f2d360d07 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:20:36 +0100 Subject: Fix not showing non-federatable rooms to remote room list queries --- synapse/storage/room.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9b7e31583c..615c0d3f65 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -174,6 +174,9 @@ class RoomWorkerStore(SQLBaseStore): query_args += [last_joined_members, last_joined_members, last_room_id] + if ignore_non_federatable: + where_clauses.append("is_federatable") + if search_filter and search_filter.get("generic_search_term", None): search_term = "%" + search_filter["generic_search_term"] + "%" -- cgit 1.4.1 From 7a5f080f91f42fe011a1ab497acdce481187da5f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:47:22 +0100 Subject: Fix appservice room list pagination --- synapse/storage/room.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9b7e31583c..70bd719521 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -150,6 +150,24 @@ class RoomWorkerStore(SQLBaseStore): where_clauses = [] query_args = [] + if network_tuple: + if network_tuple.appservice_id: + published_sql = """ + SELECT room_id from appservice_room_list + WHERE appservice_id = ? AND network_id = ? + """ + query_args.append(network_tuple.appservice_id) + query_args.append(network_tuple.network_id) + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + """ + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + UNION SELECT room_id from appservice_room_list + """ + # Work out the bounds if we're given them, these bounds look slightly # odd, but are designed to help query planner use indices by pulling # out a common bound. @@ -188,24 +206,6 @@ class RoomWorkerStore(SQLBaseStore): ) query_args += [search_term, search_term, search_term] - if network_tuple: - if network_tuple.appservice_id: - published_sql = """ - SELECT room_id from appservice_room_list - WHERE appservice_id = ? AND network_id = ? - """ - query_args.append(network_tuple.appservice_id) - query_args.append(network_tuple.network_id) - else: - published_sql = """ - SELECT room_id FROM rooms WHERE is_public - """ - else: - published_sql = """ - SELECT room_id FROM rooms WHERE is_public - UNION SELECT room_id from appservice_room_list - """ - where_clause = "" if where_clauses: where_clause = " AND " + " AND ".join(where_clauses) -- cgit 1.4.1 From 6527fa18c1e6f9bcb22318916b5e9534c91c84c1 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 2 Oct 2019 14:44:58 +0100 Subject: Add test case --- synapse/handlers/federation.py | 2 +- tests/handlers/test_federation.py | 83 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/handlers/test_federation.py (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 75d79bb8e4..91f3a69298 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2570,7 +2570,7 @@ class FederationHandler(BaseHandler): ) try: - self.auth.check_from_context(room_version, event, context) + yield self.auth.check_from_context(room_version, event, context) except AuthError as e: logger.warn("Denying third party invite %r because %s", event, e) raise e diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py new file mode 100644 index 0000000000..20416a0142 --- /dev/null +++ b/tests/handlers/test_federation.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.api.constants import EventTypes +from synapse.api.errors import AuthError, Codes +from synapse.rest import admin +from synapse.rest.client.v1 import login, room + +from tests import unittest + + +class FederationTestCase(unittest.HomeserverTestCase): + servlets = [ + admin.register_servlets, + login.register_servlets, + room.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver(http_client=None) + self.handler = hs.get_handlers().federation_handler + self.store = hs.get_datastore() + return hs + + def test_exchange_revoked_invite(self): + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + + room_id = self.helper.create_room_as( + room_creator=user_id, tok=tok + ) + + # Send a 3PID invite event with an empty body so it's considered as a revoked one. + invite_token = "sometoken" + self.helper.send_state( + room_id=room_id, + event_type=EventTypes.ThirdPartyInvite, + state_key=invite_token, + body={}, + tok=tok, + ) + + d = self.handler.on_exchange_third_party_invite_request( + room_id=room_id, + event_dict={ + "type": EventTypes.Member, + "room_id": room_id, + "sender": user_id, + "state_key": "@someone:example.org", + "content": { + "membership": "invite", + "third_party_invite": { + "display_name": "alice", + "signed": { + "mxid": "@alice:localhost", + "token": invite_token, + "signatures": { + "magic.forest": { + "ed25519:3": "fQpGIW1Snz+pwLZu6sTy2aHy/DYWWTspTJRPyNp0PKkymfIsNffysMl6ObMMFdIJhk6g6pwlIqZ54rxo8SLmAg" + } + } + } + } + } + } + ) + + failure = self.get_failure(d, AuthError).value + + self.assertEqual(failure.code, 403, failure) + self.assertEqual(failure.errcode, Codes.FORBIDDEN, failure) + self.assertEqual(failure.msg, "You are not invited to this room.") -- cgit 1.4.1 From 0f46bf5737012bb09b40f8e71c5f6db84125df8f Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 3 Oct 2019 12:57:26 +0100 Subject: Replace client_secret with in server logs (#6158) Replace `client_secret` query parameter values with `` in the logs. Prevents a scenario where a MITM of server traffic can horde 3pids on their account. --- changelog.d/6158.bugfix | 1 + synapse/http/__init__.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6158.bugfix (limited to 'synapse') diff --git a/changelog.d/6158.bugfix b/changelog.d/6158.bugfix new file mode 100644 index 0000000000..6b48fce05e --- /dev/null +++ b/changelog.d/6158.bugfix @@ -0,0 +1 @@ +Redact `client_secret` in server logs. diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index 3acf772cd1..3880ce0d94 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -42,11 +42,13 @@ def cancelled_to_request_timed_out_error(value, timeout): ACCESS_TOKEN_RE = re.compile(r"(\?.*access(_|%5[Ff])token=)[^&]*(.*)$") +CLIENT_SECRET_RE = re.compile(r"(\?.*client(_|%5[Ff])secret=)[^&]*(.*)$") def redact_uri(uri): - """Strips access tokens from the uri replaces with """ - return ACCESS_TOKEN_RE.sub(r"\1\3", uri) + """Strips sensitive information from the uri replaces with """ + uri = ACCESS_TOKEN_RE.sub(r"\1\3", uri) + return CLIENT_SECRET_RE.sub(r"\1\3", uri) class QuieterFileBodyProducer(FileBodyProducer): -- cgit 1.4.1 From ecb69d824a39d420a20a1c0b24a7174cea392560 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 3 Oct 2019 13:22:44 +0100 Subject: 1.4.0 --- CHANGES.md | 9 +++++++++ changelog.d/6158.bugfix | 1 - debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 4 files changed, 16 insertions(+), 2 deletions(-) delete mode 100644 changelog.d/6158.bugfix (limited to 'synapse') diff --git a/CHANGES.md b/CHANGES.md index 78322a08c1..165e1d4db4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse 1.4.0 (2019-10-03) +========================== + +Bugfixes +-------- + +- Redact `client_secret` in server logs. ([\#6158](https://github.com/matrix-org/synapse/issues/6158)) + + Synapse 1.4.0rc2 (2019-10-02) ============================= diff --git a/changelog.d/6158.bugfix b/changelog.d/6158.bugfix deleted file mode 100644 index 6b48fce05e..0000000000 --- a/changelog.d/6158.bugfix +++ /dev/null @@ -1 +0,0 @@ -Redact `client_secret` in server logs. diff --git a/debian/changelog b/debian/changelog index 76efc442d7..60c682cc57 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.4.0) stable; urgency=medium + + * New synapse release 1.4.0. + + -- Synapse Packaging team Thu, 03 Oct 2019 13:22:25 +0100 + matrix-synapse-py3 (1.3.1) stable; urgency=medium * New synapse release 1.3.1. diff --git a/synapse/__init__.py b/synapse/__init__.py index 5197eea22e..2d52d26af5 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -35,4 +35,4 @@ try: except ImportError: pass -__version__ = "1.4.0rc2" +__version__ = "1.4.0" -- cgit 1.4.1 From c8145af8a9446911bbb52fc0114eebf4eebede4b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:11:04 +0100 Subject: Cache room membership lookups in _get_joined_users_from_context --- synapse/storage/roommember.py | 64 ++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 19 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4df8ebdacd..f11bbd05f8 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -16,6 +16,7 @@ import logging from collections import namedtuple +from typing import Iterable from six import iteritems, itervalues @@ -32,7 +33,7 @@ from synapse.storage.events_worker import EventsWorkerStore from synapse.types import get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string -from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList from synapse.util.stringutils import to_ascii logger = logging.getLogger(__name__) @@ -567,25 +568,10 @@ class RoomMemberWorkerStore(EventsWorkerStore): missing_member_event_ids.append(event_id) if missing_member_event_ids: - rows = yield self._simple_select_many_batch( - table="room_memberships", - column="event_id", - iterable=missing_member_event_ids, - retcols=("user_id", "display_name", "avatar_url"), - keyvalues={"membership": Membership.JOIN}, - batch_size=500, - desc="_get_joined_users_from_context", - ) - - users_in_room.update( - { - to_ascii(row["user_id"]): ProfileInfo( - avatar_url=to_ascii(row["avatar_url"]), - display_name=to_ascii(row["display_name"]), - ) - for row in rows - } + event_to_memberships = yield self._get_membership_from_event_ids( + missing_member_event_ids ) + users_in_room.update((row for row in event_to_memberships.values() if row)) if event is not None and event.type == EventTypes.Member: if event.membership == Membership.JOIN: @@ -597,6 +583,46 @@ class RoomMemberWorkerStore(EventsWorkerStore): return users_in_room + @cached(max_entries=10000) + def _get_membership_from_event_id(self, event_id): + raise NotADirectoryError() + + @cachedList( + cached_method_name="_get_membership_from_event_id", + list_name="event_ids", + inlineCallbacks=True, + ) + def _get_membership_from_event_ids(self, event_ids: Iterable[str]): + """Lookup profile info for set of member event IDs. + + Args: + event_ids: The member event IDs to lookup + + Returns: + Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID + to `user_id` and ProfileInfo (or None if couldn't find event). + """ + + rows = yield self._simple_select_many_batch( + table="room_memberships", + column="event_id", + iterable=event_ids, + retcols=("user_id", "display_name", "avatar_url"), + keyvalues={"membership": Membership.JOIN}, + batch_size=500, + desc="_get_membership_from_event_ids", + ) + + return { + row["event_id"]: ( + row["user_id"], + ProfileInfo( + avatar_url=row["avatar_url"], display_name=row["display_name"] + ), + ) + for row in rows + } + @cachedInlineCallbacks(max_entries=10000) def is_host_joined(self, room_id, host): if "%" in host or "_" in host: -- cgit 1.4.1 From d89ebf7c25b4f55d513da41a3ea20b9f8adc62d1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:23:11 +0100 Subject: cachedList descriptor doesn't like typing --- synapse/storage/roommember.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index f11bbd05f8..ef6179cbe5 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -592,11 +592,11 @@ class RoomMemberWorkerStore(EventsWorkerStore): list_name="event_ids", inlineCallbacks=True, ) - def _get_membership_from_event_ids(self, event_ids: Iterable[str]): + def _get_membership_from_event_ids(self, event_ids): """Lookup profile info for set of member event IDs. Args: - event_ids: The member event IDs to lookup + event_ids (Iterable[str]): The member event IDs to lookup Returns: Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID -- cgit 1.4.1 From a9610cdf02e04ecd8df52ebe74b1cb1338a5be97 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:26:56 +0100 Subject: Fixup names and comments --- synapse/storage/roommember.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index ef6179cbe5..37c0723eb6 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -568,7 +568,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): missing_member_event_ids.append(event_id) if missing_member_event_ids: - event_to_memberships = yield self._get_membership_from_event_ids( + event_to_memberships = yield self._get_joined_profiles_from_event_ids( missing_member_event_ids ) users_in_room.update((row for row in event_to_memberships.values() if row)) @@ -584,23 +584,24 @@ class RoomMemberWorkerStore(EventsWorkerStore): return users_in_room @cached(max_entries=10000) - def _get_membership_from_event_id(self, event_id): + def _get_joined_profile_from_event_id(self, event_id): raise NotADirectoryError() @cachedList( - cached_method_name="_get_membership_from_event_id", + cached_method_name="_get_joined_profile_from_event_id", list_name="event_ids", inlineCallbacks=True, ) - def _get_membership_from_event_ids(self, event_ids): - """Lookup profile info for set of member event IDs. + def _get_joined_profiles_from_event_ids(self, event_ids): + """For given set of member event_ids check if they point to a join + event and if so return the associated user and profile info. Args: event_ids (Iterable[str]): The member event IDs to lookup Returns: Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID - to `user_id` and ProfileInfo (or None if couldn't find event). + to `user_id` and ProfileInfo (or None if not join event). """ rows = yield self._simple_select_many_batch( -- cgit 1.4.1 From 84691da6c3058f265f8b86d9a6592ba8ce90e2ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:27:18 +0100 Subject: pep8 --- synapse/storage/roommember.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 37c0723eb6..ed7d936b3d 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -16,7 +16,6 @@ import logging from collections import namedtuple -from typing import Iterable from six import iteritems, itervalues -- cgit 1.4.1 From 91f61fc6d74e923822eb92933e0d028848285a40 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:28:31 +0100 Subject: Use the right error.... --- synapse/storage/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index ed7d936b3d..1160b98ccc 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -584,7 +584,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached(max_entries=10000) def _get_joined_profile_from_event_id(self, event_id): - raise NotADirectoryError() + raise NotImplementedError() @cachedList( cached_method_name="_get_joined_profile_from_event_id", -- cgit 1.4.1 From 693156aaf4579f48ad265f42f90b1bd73feda129 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:33:54 +0100 Subject: Don't regenerate numeric user ID if registration fails. This causes huge amounts of DB IO if registrations start to fail e.g. because the DB is struggling with IO. --- synapse/handlers/register.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 06bd03b77c..6dd7ef3745 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -220,7 +220,7 @@ class RegistrationHandler(BaseHandler): attempts = 0 user = None while not user: - localpart = yield self._generate_user_id(attempts > 0) + localpart = yield self._generate_user_id() user = UserID(localpart, self.hs.hostname) user_id = user.to_string() yield self.check_user_id_not_appservice_exclusive(user_id) @@ -379,10 +379,10 @@ class RegistrationHandler(BaseHandler): ) @defer.inlineCallbacks - def _generate_user_id(self, reseed=False): - if reseed or self._next_generated_user_id is None: + def _generate_user_id(self): + if self._next_generated_user_id is None: with (yield self._generate_user_id_linearizer.queue(())): - if reseed or self._next_generated_user_id is None: + if self._next_generated_user_id is None: self._next_generated_user_id = ( yield self.store.find_next_generated_user_id_localpart() ) -- cgit 1.4.1 From ab8a64772b9e663e74fbdafef9a729bd49369e65 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:42:32 +0100 Subject: Remove unused variable --- synapse/handlers/register.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 6dd7ef3745..53410f120b 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -217,7 +217,6 @@ class RegistrationHandler(BaseHandler): else: # autogen a sequential user ID - attempts = 0 user = None while not user: localpart = yield self._generate_user_id() @@ -238,7 +237,6 @@ class RegistrationHandler(BaseHandler): # if user id is taken, just generate another user = None user_id = None - attempts += 1 if not self.hs.config.user_consent_at_registration: yield self._auto_join_rooms(user_id) -- cgit 1.4.1 From 66537e10ce77e47fac52e3f27569ac1ef0f1aaa3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 3 Oct 2019 17:47:20 +0100 Subject: add some metrics on the federation sender (#6160) --- changelog.d/6160.misc | 1 + synapse/federation/sender/__init__.py | 11 ++++++----- synapse/state/__init__.py | 24 ++++++++++++++++++------ synapse/storage/roommember.py | 21 +++++++++++++++------ synapse/util/metrics.py | 6 ++++-- 5 files changed, 44 insertions(+), 19 deletions(-) create mode 100644 changelog.d/6160.misc (limited to 'synapse') diff --git a/changelog.d/6160.misc b/changelog.d/6160.misc new file mode 100644 index 0000000000..3d7cce00e1 --- /dev/null +++ b/changelog.d/6160.misc @@ -0,0 +1 @@ +Add some metrics on the federation sender. diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index d46f4aaeb1..2b2ee8612a 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -38,7 +38,7 @@ from synapse.metrics import ( events_processed_counter, ) from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.util.metrics import measure_func +from synapse.util.metrics import Measure, measure_func logger = logging.getLogger(__name__) @@ -183,8 +183,8 @@ class FederationSender(object): # Otherwise if the last member on a server in a room is # banned then it won't receive the event because it won't # be in the room after the ban. - destinations = yield self.state.get_current_hosts_in_room( - event.room_id, latest_event_ids=event.prev_event_ids() + destinations = yield self.state.get_hosts_in_room_at_events( + event.room_id, event_ids=event.prev_event_ids() ) except Exception: logger.exception( @@ -207,8 +207,9 @@ class FederationSender(object): @defer.inlineCallbacks def handle_room_events(events): - for event in events: - yield handle_event(event) + with Measure(self.clock, "handle_room_events"): + for event in events: + yield handle_event(event) events_by_room = {} for event in events: diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 2b0f4c79ee..dc9f5a9008 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -33,7 +33,7 @@ from synapse.state import v1, v2 from synapse.util.async_helpers import Linearizer from synapse.util.caches import get_cache_factor_for from synapse.util.caches.expiringcache import ExpiringCache -from synapse.util.metrics import Measure +from synapse.util.metrics import Measure, measure_func logger = logging.getLogger(__name__) @@ -191,11 +191,22 @@ class StateHandler(object): return joined_users @defer.inlineCallbacks - def get_current_hosts_in_room(self, room_id, latest_event_ids=None): - if not latest_event_ids: - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - logger.debug("calling resolve_state_groups from get_current_hosts_in_room") - entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) + def get_current_hosts_in_room(self, room_id): + event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + return (yield self.get_hosts_in_room_at_events(room_id, event_ids)) + + @defer.inlineCallbacks + def get_hosts_in_room_at_events(self, room_id, event_ids): + """Get the hosts that were in a room at the given event ids + + Args: + room_id (str): + event_ids (list[str]): + + Returns: + Deferred[list[str]]: the hosts in the room at the given events + """ + entry = yield self.resolve_state_groups_for_events(room_id, event_ids) joined_hosts = yield self.store.get_joined_hosts(room_id, entry) return joined_hosts @@ -344,6 +355,7 @@ class StateHandler(object): return context + @measure_func() @defer.inlineCallbacks def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4df8ebdacd..1550d827ba 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -33,6 +33,7 @@ from synapse.types import get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.util.metrics import Measure from synapse.util.stringutils import to_ascii logger = logging.getLogger(__name__) @@ -483,6 +484,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) return result + @defer.inlineCallbacks def get_joined_users_from_state(self, room_id, state_entry): state_group = state_entry.state_group if not state_group: @@ -492,9 +494,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._get_joined_users_from_context( - room_id, state_group, state_entry.state, context=state_entry - ) + with Measure(self._clock, "get_joined_users_from_state"): + return ( + yield self._get_joined_users_from_context( + room_id, state_group, state_entry.state, context=state_entry + ) + ) @cachedInlineCallbacks( num_args=2, cache_context=True, iterable=True, max_entries=100000 @@ -669,6 +674,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return True + @defer.inlineCallbacks def get_joined_hosts(self, room_id, state_entry): state_group = state_entry.state_group if not state_group: @@ -678,9 +684,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._get_joined_hosts( - room_id, state_group, state_entry.state, state_entry=state_entry - ) + with Measure(self._clock, "get_joined_hosts"): + return ( + yield self._get_joined_hosts( + room_id, state_group, state_entry.state, state_entry=state_entry + ) + ) @cachedInlineCallbacks(num_args=2, max_entries=10000, iterable=True) # @defer.inlineCallbacks diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 0910930c21..4b1bcdf23c 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -60,12 +60,14 @@ in_flight = InFlightGauge( ) -def measure_func(name): +def measure_func(name=None): def wrapper(func): + block_name = func.__name__ if name is None else name + @wraps(func) @defer.inlineCallbacks def measured_func(self, *args, **kwargs): - with Measure(self.clock, name): + with Measure(self.clock, block_name): r = yield func(self, *args, **kwargs) return r -- cgit 1.4.1 From 39b40d6d9989b09de39da5b6d3f85ee535e41138 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 4 Oct 2019 10:34:52 +0200 Subject: media/thumbnailer: Better quality for 1-bit / 8-bit color palette images (#2142) Pillow will use nearest neighbour as the resampling algorithm if the source image is either 1-bit or a color palette using 8 bits. If we convert to RGB before scaling, we'll probably get a better result. --- changelog.d/2142.feature | 1 + synapse/rest/media/v1/thumbnailer.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 changelog.d/2142.feature (limited to 'synapse') diff --git a/changelog.d/2142.feature b/changelog.d/2142.feature new file mode 100644 index 0000000000..e21e8325e1 --- /dev/null +++ b/changelog.d/2142.feature @@ -0,0 +1 @@ +Improve quality of thumbnails for 1-bit/8-bit color palette images. diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index c995d7e043..8cf415e29d 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -82,13 +82,21 @@ class Thumbnailer(object): else: return (max_height * self.width) // self.height, max_height + def _resize(self, width, height): + # 1-bit or 8-bit color palette images need converting to RGB + # otherwise they will be scaled using nearest neighbour which + # looks awful + if self.image.mode in ["1", "P"]: + self.image = self.image.convert("RGB") + return self.image.resize((width, height), Image.ANTIALIAS) + def scale(self, width, height, output_type): """Rescales the image to the given dimensions. Returns: BytesIO: the bytes of the encoded image ready to be written to disk """ - scaled = self.image.resize((width, height), Image.ANTIALIAS) + scaled = self._resize(width, height) return self._encode_image(scaled, output_type) def crop(self, width, height, output_type): @@ -107,13 +115,13 @@ class Thumbnailer(object): """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width - scaled_image = self.image.resize((width, scaled_height), Image.ANTIALIAS) + scaled_image = self._resize(width, scaled_height) crop_top = (scaled_height - height) // 2 crop_bottom = height + crop_top cropped = scaled_image.crop((0, crop_top, width, crop_bottom)) else: scaled_width = (height * self.width) // self.height - scaled_image = self.image.resize((scaled_width, height), Image.ANTIALIAS) + scaled_image = self._resize(scaled_width, height) crop_left = (scaled_width - width) // 2 crop_right = width + crop_left cropped = scaled_image.crop((crop_left, 0, crop_right, height)) -- cgit 1.4.1 From 13c4345c844e75b0d1a4ce66e4fb2eb9820cb7f6 Mon Sep 17 00:00:00 2001 From: Alexander Maznev Date: Fri, 4 Oct 2019 04:34:16 -0500 Subject: Update `user_filters` table to have a unique index, and non-null columns (#1172) --- changelog.d/1172.misc | 1 + .../schema/delta/56/unique_user_filter_index.py | 46 ++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 changelog.d/1172.misc create mode 100644 synapse/storage/schema/delta/56/unique_user_filter_index.py (limited to 'synapse') diff --git a/changelog.d/1172.misc b/changelog.d/1172.misc new file mode 100644 index 0000000000..30b3e56082 --- /dev/null +++ b/changelog.d/1172.misc @@ -0,0 +1 @@ +Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this. \ No newline at end of file diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py new file mode 100644 index 0000000000..4efc1a586f --- /dev/null +++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py @@ -0,0 +1,46 @@ +import logging + +from synapse.storage.engines import PostgresEngine + +logger = logging.getLogger(__name__) + + +def run_upgrade(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + select_clause = """ + CREATE TEMPORARY TABLE user_filters_migration AS + SELECT DISTINCT ON (user_id, filter_id) user_id, filter_id, filter_json + FROM user_filters; + """ + else: + select_clause = """ + CREATE TEMPORARY TABLE user_filters_migration AS + SELECT * FROM user_filters GROUP BY user_id, filter_id; + """ + sql = ( + """ + BEGIN; + %s + DROP INDEX user_filters_by_user_id_filter_id; + DELETE FROM user_filters; + ALTER TABLE user_filters + ALTER COLUMN user_id SET NOT NULL + ALTER COLUMN filter_id SET NOT NULL + ALTER COLUMN filter_json SET NOT NULL; + INSERT INTO user_filters(user_id, filter_id, filter_json) + SELECT * FROM user_filters_migration; + DROP TABLE user_filters_migration; + CREATE UNIQUE INDEX user_filters_by_user_id_filter_id_unique + ON user_filters(user_id, filter_id); + END; + """ + % select_clause + ) + if isinstance(database_engine, PostgresEngine): + cur.execute(sql) + else: + cur.executescript(sql) + + +def run_create(cur, database_engine, *args, **kwargs): + pass -- cgit 1.4.1 From 81d51ce48b449ee55bdcc17b76050283d848d405 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 4 Oct 2019 11:16:19 +0100 Subject: Incorporate review --- synapse/handlers/federation.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 91f3a69298..58a1654885 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2600,20 +2600,14 @@ class FederationHandler(BaseHandler): ) if original_invite: # If the m.room.third_party_invite event's content is empty, it means the - # invite has been revoked. - if original_invite.content: - display_name = original_invite.content["display_name"] - event_dict["content"]["third_party_invite"][ - "display_name" - ] = display_name - else: - # Don't discard or raise an error here because that's not the right place - # to do auth checks. The auth check will fail on this invite because we - # won't be able to fetch public keys from the m.room.third_party_invite - # event's content (because it's empty). - logger.info( - "Found invite event for third_party_invite but it has been revoked" - ) + # invite has been revoked. In this case, we don't have to raise an error here + # because the auth check will fail on the invite (because it's not able to + # fetch public keys from the m.room.third_party_invite event's content, which + # is empty. + display_name = original_invite.content.get("display_name") + event_dict["content"]["third_party_invite"][ + "display_name" + ] = display_name else: logger.info( "Could not find invite event for third_party_invite: %r", event_dict -- cgit 1.4.1 From 4676732ca061ede3089b9c9b97a6d6b523b8c8e0 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 4 Oct 2019 11:18:28 +0100 Subject: Lint --- synapse/handlers/federation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 58a1654885..b2b3a7e221 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2605,9 +2605,7 @@ class FederationHandler(BaseHandler): # fetch public keys from the m.room.third_party_invite event's content, which # is empty. display_name = original_invite.content.get("display_name") - event_dict["content"]["third_party_invite"][ - "display_name" - ] = display_name + event_dict["content"]["third_party_invite"]["display_name"] = display_name else: logger.info( "Could not find invite event for third_party_invite: %r", event_dict -- cgit 1.4.1 From 21d51ab59852d6a4d504a6ccd79ad82070c03a12 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 4 Oct 2019 11:21:24 +0100 Subject: Typo --- synapse/handlers/federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b2b3a7e221..50fc0fde2a 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2603,7 +2603,7 @@ class FederationHandler(BaseHandler): # invite has been revoked. In this case, we don't have to raise an error here # because the auth check will fail on the invite (because it's not able to # fetch public keys from the m.room.third_party_invite event's content, which - # is empty. + # is empty). display_name = original_invite.content.get("display_name") event_dict["content"]["third_party_invite"]["display_name"] = display_name else: -- cgit 1.4.1 From 5119a4cac7f42691beb455eedbefd99a39d64897 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 7 Oct 2019 12:21:17 +0100 Subject: Fix bug where we didn't pull out event ID --- synapse/storage/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 81a9ab6dc8..59a89fad60 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -612,7 +612,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): table="room_memberships", column="event_id", iterable=event_ids, - retcols=("user_id", "display_name", "avatar_url"), + retcols=("user_id", "display_name", "avatar_url", "event_id"), keyvalues={"membership": Membership.JOIN}, batch_size=500, desc="_get_membership_from_event_ids", -- cgit 1.4.1 From c8e6c308c6358f20b1d0ef1292f8e9e2f36a549e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 7 Oct 2019 13:15:35 +0100 Subject: Fix unique_user_filter_index schema update --- synapse/storage/schema/delta/56/unique_user_filter_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py index 4efc1a586f..60031f23ca 100644 --- a/synapse/storage/schema/delta/56/unique_user_filter_index.py +++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py @@ -24,8 +24,8 @@ def run_upgrade(cur, database_engine, *args, **kwargs): DROP INDEX user_filters_by_user_id_filter_id; DELETE FROM user_filters; ALTER TABLE user_filters - ALTER COLUMN user_id SET NOT NULL - ALTER COLUMN filter_id SET NOT NULL + ALTER COLUMN user_id SET NOT NULL, + ALTER COLUMN filter_id SET NOT NULL, ALTER COLUMN filter_json SET NOT NULL; INSERT INTO user_filters(user_id, filter_id, filter_json) SELECT * FROM user_filters_migration; -- cgit 1.4.1 From 276ae5c63eaef656d486e190298f7a5ec99a7a5b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 7 Oct 2019 14:41:39 +0100 Subject: add some logging to the rooms stats updates, to try to track down a flaky test (#6167) --- changelog.d/6167.misc | 1 + synapse/handlers/stats.py | 1 + synapse/storage/stats.py | 3 +++ 3 files changed, 5 insertions(+) create mode 100644 changelog.d/6167.misc (limited to 'synapse') diff --git a/changelog.d/6167.misc b/changelog.d/6167.misc new file mode 100644 index 0000000000..32c96b3681 --- /dev/null +++ b/changelog.d/6167.misc @@ -0,0 +1 @@ +Add some logging to the rooms stats updates, to try to track down a flaky test. diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index cbac7c347a..c62b113115 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -293,6 +293,7 @@ class StatsHandler(StateDeltasHandler): room_state["guest_access"] = event_content.get("guest_access") for room_id, state in room_to_state_updates.items(): + logger.info("Updating room_stats_state for %s: %s", room_id, state) yield self.store.update_room_state(room_id, state) return room_to_stats_deltas, user_to_stats_deltas diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 09190d684e..7c224cd3d9 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -332,6 +332,9 @@ class StatsStore(StateDeltasStore): def _bulk_update_stats_delta_txn(txn): for stats_type, stats_updates in updates.items(): for stats_id, fields in stats_updates.items(): + logger.info( + "Updating %s stats for %s: %s", stats_type, stats_id, fields + ) self._update_stats_delta_txn( txn, ts=ts, -- cgit 1.4.1 From dc795ba709f2ffe41671d25d94f21d4b31a5301d Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Mon, 7 Oct 2019 15:41:25 +0100 Subject: Log responder we are using. (#6139) This prevents us logging "Responding to media request with responder %s". --- changelog.d/6139.misc | 1 + synapse/rest/media/v1/_base.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6139.misc (limited to 'synapse') diff --git a/changelog.d/6139.misc b/changelog.d/6139.misc new file mode 100644 index 0000000000..d4b65e7af8 --- /dev/null +++ b/changelog.d/6139.misc @@ -0,0 +1 @@ +Log responder when responding to media request. diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 5fefee4dde..65bbf00073 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -195,7 +195,7 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam respond_404(request) return - logger.debug("Responding to media request with responder %s") + logger.debug("Responding to media request with responder %s", responder) add_file_headers(request, media_type, file_size, upload_name) try: with responder: -- cgit 1.4.1 From 88957199e7edbd33a66d419224df9ba0eb9e604d Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:16:39 +0100 Subject: Move client_ips's bg updates to a dedicated store --- synapse/storage/client_ips.py | 200 ++++++++++++++++++++++-------------------- 1 file changed, 106 insertions(+), 94 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index bb135166ce..1d89b50f57 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -33,14 +33,14 @@ logger = logging.getLogger(__name__) LAST_SEEN_GRANULARITY = 120 * 1000 -class ClientIpStore(background_updates.BackgroundUpdateStore): +class ClientIpBackgroundUpdateStore(background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): self.client_ip_last_seen = Cache( name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR ) - super(ClientIpStore, self).__init__(db_conn, hs) + super(ClientIpBackgroundUpdateStore, self).__init__(db_conn, hs) self.user_ips_max_age = hs.config.user_ips_max_age @@ -92,19 +92,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "devices_last_seen", self._devices_last_seen_update ) - # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) - self._batch_row_update = {} - - self._client_ip_looper = self._clock.looping_call( - self._update_client_ips_batch, 5 * 1000 - ) - self.hs.get_reactor().addSystemEventTrigger( - "before", "shutdown", self._update_client_ips_batch - ) - - if self.user_ips_max_age: - self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) - @defer.inlineCallbacks def _remove_user_ip_nonunique(self, progress, batch_size): def f(conn): @@ -303,6 +290,110 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): return batch_size + @defer.inlineCallbacks + def _devices_last_seen_update(self, progress, batch_size): + """Background update to insert last seen info into devices table + """ + + last_user_id = progress.get("last_user_id", "") + last_device_id = progress.get("last_device_id", "") + + def _devices_last_seen_update_txn(txn): + # This consists of two queries: + # + # 1. The sub-query searches for the next N devices and joins + # against user_ips to find the max last_seen associated with + # that device. + # 2. The outer query then joins again against user_ips on + # user/device/last_seen. This *should* hopefully only + # return one row, but if it does return more than one then + # we'll just end up updating the same device row multiple + # times, which is fine. + + if self.database_engine.supports_tuple_comparison: + where_clause = "(user_id, device_id) > (?, ?)" + where_args = [last_user_id, last_device_id] + else: + # We explicitly do a `user_id >= ? AND (...)` here to ensure + # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)` + # makes it hard for query optimiser to tell that it can use the + # index on user_id + where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)" + where_args = [last_user_id, last_user_id, last_device_id] + + sql = """ + SELECT + last_seen, ip, user_agent, user_id, device_id + FROM ( + SELECT + user_id, device_id, MAX(u.last_seen) AS last_seen + FROM devices + INNER JOIN user_ips AS u USING (user_id, device_id) + WHERE %(where_clause)s + GROUP BY user_id, device_id + ORDER BY user_id ASC, device_id ASC + LIMIT ? + ) c + INNER JOIN user_ips AS u USING (user_id, device_id, last_seen) + """ % { + "where_clause": where_clause + } + txn.execute(sql, where_args + [batch_size]) + + rows = txn.fetchall() + if not rows: + return 0 + + sql = """ + UPDATE devices + SET last_seen = ?, ip = ?, user_agent = ? + WHERE user_id = ? AND device_id = ? + """ + txn.execute_batch(sql, rows) + + _, _, _, user_id, device_id = rows[-1] + self._background_update_progress_txn( + txn, + "devices_last_seen", + {"last_user_id": user_id, "last_device_id": device_id}, + ) + + return len(rows) + + updated = yield self.runInteraction( + "_devices_last_seen_update", _devices_last_seen_update_txn + ) + + if not updated: + yield self._end_background_update("devices_last_seen") + + return updated + + +class ClientIpStore(ClientIpBackgroundUpdateStore): + def __init__(self, db_conn, hs): + + self.client_ip_last_seen = Cache( + name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR + ) + + super(ClientIpStore, self).__init__(db_conn, hs) + + self.user_ips_max_age = hs.config.user_ips_max_age + + # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) + self._batch_row_update = {} + + self._client_ip_looper = self._clock.looping_call( + self._update_client_ips_batch, 5 * 1000 + ) + self.hs.get_reactor().addSystemEventTrigger( + "before", "shutdown", self._update_client_ips_batch + ) + + if self.user_ips_max_age: + self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) + @defer.inlineCallbacks def insert_client_ip( self, user_id, access_token, ip, user_agent, device_id, now=None @@ -454,85 +545,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): for (access_token, ip), (user_agent, last_seen) in iteritems(results) ) - @defer.inlineCallbacks - def _devices_last_seen_update(self, progress, batch_size): - """Background update to insert last seen info into devices table - """ - - last_user_id = progress.get("last_user_id", "") - last_device_id = progress.get("last_device_id", "") - - def _devices_last_seen_update_txn(txn): - # This consists of two queries: - # - # 1. The sub-query searches for the next N devices and joins - # against user_ips to find the max last_seen associated with - # that device. - # 2. The outer query then joins again against user_ips on - # user/device/last_seen. This *should* hopefully only - # return one row, but if it does return more than one then - # we'll just end up updating the same device row multiple - # times, which is fine. - - if self.database_engine.supports_tuple_comparison: - where_clause = "(user_id, device_id) > (?, ?)" - where_args = [last_user_id, last_device_id] - else: - # We explicitly do a `user_id >= ? AND (...)` here to ensure - # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)` - # makes it hard for query optimiser to tell that it can use the - # index on user_id - where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)" - where_args = [last_user_id, last_user_id, last_device_id] - - sql = """ - SELECT - last_seen, ip, user_agent, user_id, device_id - FROM ( - SELECT - user_id, device_id, MAX(u.last_seen) AS last_seen - FROM devices - INNER JOIN user_ips AS u USING (user_id, device_id) - WHERE %(where_clause)s - GROUP BY user_id, device_id - ORDER BY user_id ASC, device_id ASC - LIMIT ? - ) c - INNER JOIN user_ips AS u USING (user_id, device_id, last_seen) - """ % { - "where_clause": where_clause - } - txn.execute(sql, where_args + [batch_size]) - - rows = txn.fetchall() - if not rows: - return 0 - - sql = """ - UPDATE devices - SET last_seen = ?, ip = ?, user_agent = ? - WHERE user_id = ? AND device_id = ? - """ - txn.execute_batch(sql, rows) - - _, _, _, user_id, device_id = rows[-1] - self._background_update_progress_txn( - txn, - "devices_last_seen", - {"last_user_id": user_id, "last_device_id": device_id}, - ) - - return len(rows) - - updated = yield self.runInteraction( - "_devices_last_seen_update", _devices_last_seen_update_txn - ) - - if not updated: - yield self._end_background_update("devices_last_seen") - - return updated - @wrap_as_background_process("prune_old_user_ips") async def _prune_old_user_ips(self): """Removes entries in user IPs older than the configured period. -- cgit 1.4.1 From 2d3b4f42f029da54132b26119e9ec0d902505eef Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:19:25 +0100 Subject: Move deviceinbox's bg updates to a dedicated store --- synapse/storage/deviceinbox.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index 6b7458304e..70bc2bb2cc 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -208,11 +208,11 @@ class DeviceInboxWorkerStore(SQLBaseStore): ) -class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): +class DeviceInboxBackgroundUpdateStore(BackgroundUpdateStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" def __init__(self, db_conn, hs): - super(DeviceInboxStore, self).__init__(db_conn, hs) + super(DeviceInboxBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( "device_inbox_stream_index", @@ -225,6 +225,26 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox ) + @defer.inlineCallbacks + def _background_drop_index_device_inbox(self, progress, batch_size): + def reindex_txn(conn): + txn = conn.cursor() + txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id") + txn.close() + + yield self.runWithConnection(reindex_txn) + + yield self._end_background_update(self.DEVICE_INBOX_STREAM_ID) + + return 1 + + +class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore): + DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" + + def __init__(self, db_conn, hs): + super(DeviceInboxStore, self).__init__(db_conn, hs) + # Map of (user_id, device_id) to the last stream_id that has been # deleted up to. This is so that we can no op deletions. self._last_device_delete_cache = ExpiringCache( @@ -435,16 +455,3 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): return self.runInteraction( "get_all_new_device_messages", get_all_new_device_messages_txn ) - - @defer.inlineCallbacks - def _background_drop_index_device_inbox(self, progress, batch_size): - def reindex_txn(conn): - txn = conn.cursor() - txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id") - txn.close() - - yield self.runWithConnection(reindex_txn) - - yield self._end_background_update(self.DEVICE_INBOX_STREAM_ID) - - return 1 -- cgit 1.4.1 From cef9f6753e51c1fb7b24f2122b0d0ada767d6e08 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:24:03 +0100 Subject: Move devices's bg updates to a dedicated store --- synapse/storage/devices.py | 49 +++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 79a58df591..111bfb3d64 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -512,17 +512,9 @@ class DeviceWorkerStore(SQLBaseStore): return results -class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): +class DeviceBackgroundUpdateStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): - super(DeviceStore, self).__init__(db_conn, hs) - - # Map of (user_id, device_id) -> bool. If there is an entry that implies - # the device exists. - self.device_id_exists_cache = Cache( - name="device_id_exists", keylen=2, max_entries=10000 - ) - - self._clock.looping_call(self._prune_old_outbound_device_pokes, 60 * 60 * 1000) + super(DeviceBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( "device_lists_stream_idx", @@ -555,6 +547,31 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): self._drop_device_list_streams_non_unique_indexes, ) + @defer.inlineCallbacks + def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size): + def f(conn): + txn = conn.cursor() + txn.execute("DROP INDEX IF EXISTS device_lists_remote_cache_id") + txn.execute("DROP INDEX IF EXISTS device_lists_remote_extremeties_id") + txn.close() + + yield self.runWithConnection(f) + yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES) + return 1 + + +class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(DeviceStore, self).__init__(db_conn, hs) + + # Map of (user_id, device_id) -> bool. If there is an entry that implies + # the device exists. + self.device_id_exists_cache = Cache( + name="device_id_exists", keylen=2, max_entries=10000 + ) + + self._clock.looping_call(self._prune_old_outbound_device_pokes, 60 * 60 * 1000) + @defer.inlineCallbacks def store_device(self, user_id, device_id, initial_device_display_name): """Ensure the given device is known; add it to the store if not @@ -910,15 +927,3 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): "_prune_old_outbound_device_pokes", _prune_txn, ) - - @defer.inlineCallbacks - def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size): - def f(conn): - txn = conn.cursor() - txn.execute("DROP INDEX IF EXISTS device_lists_remote_cache_id") - txn.execute("DROP INDEX IF EXISTS device_lists_remote_extremeties_id") - txn.close() - - yield self.runWithConnection(f) - yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES) - return 1 -- cgit 1.4.1 From 54f87e07342ddbf82e9b8ee7c7ce227624c2f97b Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:30:22 +0100 Subject: Move media_repository's bg updates to a dedicated store --- synapse/storage/media_repository.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 6b1238ce4a..2eb2740d07 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -15,11 +15,10 @@ from synapse.storage.background_updates import BackgroundUpdateStore -class MediaRepositoryStore(BackgroundUpdateStore): - """Persistence for attachments and avatars""" +class MediaRepositoryBackgroundUpdateStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): - super(MediaRepositoryStore, self).__init__(db_conn, hs) + super(MediaRepositoryBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( update_name="local_media_repository_url_idx", @@ -29,6 +28,13 @@ class MediaRepositoryStore(BackgroundUpdateStore): where_clause="url_cache IS NOT NULL", ) + +class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): + """Persistence for attachments and avatars""" + + def __init__(self, db_conn, hs): + super(MediaRepositoryStore, self).__init__(db_conn, hs) + def get_local_media(self, media_id): """Get the metadata for a local piece of media Returns: -- cgit 1.4.1 From 81e6ffb536b19c662a81736f88ef2f243d425532 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:44:26 +0100 Subject: Move registration's bg updates to a dedicated store --- synapse/storage/registration.py | 198 +++++++++++++++++++++------------------- 1 file changed, 103 insertions(+), 95 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 1a859352b6..1f6c93b73b 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -37,7 +37,57 @@ THIRTY_MINUTES_IN_MS = 30 * 60 * 1000 logger = logging.getLogger(__name__) -class RegistrationWorkerStore(SQLBaseStore): +class RegistrationDeactivationStore(SQLBaseStore): + @cachedInlineCallbacks() + def get_user_deactivated_status(self, user_id): + """Retrieve the value for the `deactivated` property for the provided user. + + Args: + user_id (str): The ID of the user to retrieve the status for. + + Returns: + defer.Deferred(bool): The requested value. + """ + + res = yield self._simple_select_one_onecol( + table="users", + keyvalues={"name": user_id}, + retcol="deactivated", + desc="get_user_deactivated_status", + ) + + # Convert the integer into a boolean. + return res == 1 + + @defer.inlineCallbacks + def set_user_deactivated_status(self, user_id, deactivated): + """Set the `deactivated` property for the provided user to the provided value. + + Args: + user_id (str): The ID of the user to set the status for. + deactivated (bool): The value to set for `deactivated`. + """ + + yield self.runInteraction( + "set_user_deactivated_status", + self.set_user_deactivated_status_txn, + user_id, + deactivated, + ) + + def set_user_deactivated_status_txn(self, txn, user_id, deactivated): + self._simple_update_one_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + updatevalues={"deactivated": 1 if deactivated else 0}, + ) + self._invalidate_cache_and_stream( + txn, self.get_user_deactivated_status, (user_id,) + ) + + +class RegistrationWorkerStore(RegistrationDeactivationStore): def __init__(self, db_conn, hs): super(RegistrationWorkerStore, self).__init__(db_conn, hs) @@ -673,27 +723,6 @@ class RegistrationWorkerStore(SQLBaseStore): desc="get_id_servers_user_bound", ) - @cachedInlineCallbacks() - def get_user_deactivated_status(self, user_id): - """Retrieve the value for the `deactivated` property for the provided user. - - Args: - user_id (str): The ID of the user to retrieve the status for. - - Returns: - defer.Deferred(bool): The requested value. - """ - - res = yield self._simple_select_one_onecol( - table="users", - keyvalues={"name": user_id}, - retcol="deactivated", - desc="get_user_deactivated_status", - ) - - # Convert the integer into a boolean. - return res == 1 - def get_threepid_validation_session( self, medium, client_secret, address=None, sid=None, validated=True ): @@ -787,13 +816,14 @@ class RegistrationWorkerStore(SQLBaseStore): ) -class RegistrationStore( - RegistrationWorkerStore, background_updates.BackgroundUpdateStore +class RegistrationBackgroundUpdateStore( + RegistrationDeactivationStore, background_updates.BackgroundUpdateStore ): def __init__(self, db_conn, hs): - super(RegistrationStore, self).__init__(db_conn, hs) + super(RegistrationBackgroundUpdateStore, self).__init__(db_conn, hs) self.clock = hs.get_clock() + self.config = hs.config self.register_background_index_update( "access_tokens_device_index", @@ -809,8 +839,6 @@ class RegistrationStore( columns=["creation_ts"], ) - self._account_validity = hs.config.account_validity - # we no longer use refresh tokens, but it's possible that some people # might have a background update queued to build this index. Just # clear the background update. @@ -824,17 +852,6 @@ class RegistrationStore( "users_set_deactivated_flag", self._background_update_set_deactivated_flag ) - # Create a background job for culling expired 3PID validity tokens - def start_cull(): - # run as a background process to make sure that the database transactions - # have a logcontext to report to - return run_as_background_process( - "cull_expired_threepid_validation_tokens", - self.cull_expired_threepid_validation_tokens, - ) - - hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS) - @defer.inlineCallbacks def _background_update_set_deactivated_flag(self, progress, batch_size): """Retrieves a list of all deactivated users and sets the 'deactivated' flag to 1 @@ -896,6 +913,54 @@ class RegistrationStore( return nb_processed + @defer.inlineCallbacks + def _bg_user_threepids_grandfather(self, progress, batch_size): + """We now track which identity servers a user binds their 3PID to, so + we need to handle the case of existing bindings where we didn't track + this. + + We do this by grandfathering in existing user threepids assuming that + they used one of the server configured trusted identity servers. + """ + id_servers = set(self.config.trusted_third_party_id_servers) + + def _bg_user_threepids_grandfather_txn(txn): + sql = """ + INSERT INTO user_threepid_id_server + (user_id, medium, address, id_server) + SELECT user_id, medium, address, ? + FROM user_threepids + """ + + txn.executemany(sql, [(id_server,) for id_server in id_servers]) + + if id_servers: + yield self.runInteraction( + "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn + ) + + yield self._end_background_update("user_threepids_grandfather") + + return 1 + + +class RegistrationStore(RegistrationWorkerStore, RegistrationBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(RegistrationStore, self).__init__(db_conn, hs) + + self._account_validity = hs.config.account_validity + + # Create a background job for culling expired 3PID validity tokens + def start_cull(): + # run as a background process to make sure that the database transactions + # have a logcontext to report to + return run_as_background_process( + "cull_expired_threepid_validation_tokens", + self.cull_expired_threepid_validation_tokens, + ) + + hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS) + @defer.inlineCallbacks def add_access_token_to_user(self, user_id, token, device_id, valid_until_ms): """Adds an access token for the given user. @@ -1244,36 +1309,6 @@ class RegistrationStore( desc="get_users_pending_deactivation", ) - @defer.inlineCallbacks - def _bg_user_threepids_grandfather(self, progress, batch_size): - """We now track which identity servers a user binds their 3PID to, so - we need to handle the case of existing bindings where we didn't track - this. - - We do this by grandfathering in existing user threepids assuming that - they used one of the server configured trusted identity servers. - """ - id_servers = set(self.config.trusted_third_party_id_servers) - - def _bg_user_threepids_grandfather_txn(txn): - sql = """ - INSERT INTO user_threepid_id_server - (user_id, medium, address, id_server) - SELECT user_id, medium, address, ? - FROM user_threepids - """ - - txn.executemany(sql, [(id_server,) for id_server in id_servers]) - - if id_servers: - yield self.runInteraction( - "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn - ) - - yield self._end_background_update("user_threepids_grandfather") - - return 1 - def validate_threepid_session(self, session_id, client_secret, token, current_ts): """Attempt to validate a threepid session using a token @@ -1464,30 +1499,3 @@ class RegistrationStore( cull_expired_threepid_validation_tokens_txn, self.clock.time_msec(), ) - - def set_user_deactivated_status_txn(self, txn, user_id, deactivated): - self._simple_update_one_txn( - txn=txn, - table="users", - keyvalues={"name": user_id}, - updatevalues={"deactivated": 1 if deactivated else 0}, - ) - self._invalidate_cache_and_stream( - txn, self.get_user_deactivated_status, (user_id,) - ) - - @defer.inlineCallbacks - def set_user_deactivated_status(self, user_id, deactivated): - """Set the `deactivated` property for the provided user to the provided value. - - Args: - user_id (str): The ID of the user to set the status for. - deactivated (bool): The value to set for `deactivated`. - """ - - yield self.runInteraction( - "set_user_deactivated_status", - self.set_user_deactivated_status_txn, - user_id, - deactivated, - ) -- cgit 1.4.1 From 841054ad96b44161d7a990bc1349ecc70fcd736c Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:47:42 +0100 Subject: Move search's bg updates to a dedicated store --- synapse/storage/search.py | 56 ++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 25 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index df87ab6a6d..9a41e78002 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -36,7 +36,7 @@ SearchEntry = namedtuple( ) -class SearchStore(BackgroundUpdateStore): +class SearchBackgroundUpdateStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" @@ -44,7 +44,7 @@ class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin" def __init__(self, db_conn, hs): - super(SearchStore, self).__init__(db_conn, hs) + super(SearchBackgroundUpdateStore, self).__init__(db_conn, hs) if not hs.config.enable_search: return @@ -289,29 +289,6 @@ class SearchStore(BackgroundUpdateStore): return num_rows - def store_event_search_txn(self, txn, event, key, value): - """Add event to the search table - - Args: - txn (cursor): - event (EventBase): - key (str): - value (str): - """ - self.store_search_entries_txn( - txn, - ( - SearchEntry( - key=key, - value=value, - event_id=event.event_id, - room_id=event.room_id, - stream_ordering=event.internal_metadata.stream_ordering, - origin_server_ts=event.origin_server_ts, - ), - ), - ) - def store_search_entries_txn(self, txn, entries): """Add entries to the search table @@ -358,6 +335,35 @@ class SearchStore(BackgroundUpdateStore): # This should be unreachable. raise Exception("Unrecognized database engine") + +class SearchStore(SearchBackgroundUpdateStore): + + def __init__(self, db_conn, hs): + super(SearchStore, self).__init__(db_conn, hs) + + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + self.store_search_entries_txn( + txn, + ( + SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ), + ), + ) + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. -- cgit 1.4.1 From cfccd2d78a0b8338f33a5631d8f0637d4ed07e7e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:56:16 +0100 Subject: Move state's bg updates to a dedicated store --- synapse/storage/state.py | 394 ++++++++++++++++++++++++----------------------- 1 file changed, 204 insertions(+), 190 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1980a87108..71b533c006 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -353,8 +353,158 @@ class StateFilter(object): return member_filter, non_member_filter +class StateGroupBackgroundUpdateStore(SQLBaseStore): + """Defines functions related to state groups needed to run the state backgroud + updates. + """ + + def _count_state_group_hops_txn(self, txn, state_group): + """Given a state group, count how many hops there are in the tree. + + This is used to ensure the delta chains don't get too long. + """ + if isinstance(self.database_engine, PostgresEngine): + sql = """ + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT count(*) FROM state; + """ + + txn.execute(sql, (state_group,)) + row = txn.fetchone() + if row and row[0]: + return row[0] + else: + return 0 + else: + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) + next_group = state_group + count = 0 + + while next_group: + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + if next_group: + count += 1 + + return count + + def _get_state_groups_from_groups_txn( + self, txn, groups, state_filter=StateFilter.all() + ): + results = {group: {} for group in groups} + + where_clause, where_args = state_filter.make_sql_filter_clause() + + # Unless the filter clause is empty, we're going to append it after an + # existing where clause + if where_clause: + where_clause = " AND (%s)" % (where_clause,) + + if isinstance(self.database_engine, PostgresEngine): + # Temporarily disable sequential scans in this transaction. This is + # a temporary hack until we can add the right indices in + txn.execute("SET LOCAL enable_seqscan=off") + + # The below query walks the state_group tree so that the "state" + # table includes all state_groups in the tree. It then joins + # against `state_groups_state` to fetch the latest state. + # It assumes that previous state groups are always numerically + # lesser. + # The PARTITION is used to get the event_id in the greatest state + # group for the given type, state_key. + # This may return multiple rows per (type, state_key), but last_value + # should be the same. + sql = """ + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT DISTINCT type, state_key, last_value(event_id) OVER ( + PARTITION BY type, state_key ORDER BY state_group ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS event_id FROM state_groups_state + WHERE state_group IN ( + SELECT state_group FROM state + ) + """ + + for group in groups: + args = [group] + args.extend(where_args) + + txn.execute(sql + where_clause, args) + for row in txn: + typ, state_key, event_id = row + key = (typ, state_key) + results[group][key] = event_id + else: + max_entries_returned = state_filter.max_entries_returned() + + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) + for group in groups: + next_group = group + + while next_group: + # We did this before by getting the list of group ids, and + # then passing that list to sqlite to get latest event for + # each (type, state_key). However, that was terribly slow + # without the right indices (which we can't add until + # after we finish deduping state, which requires this func) + args = [next_group] + args.extend(where_args) + + txn.execute( + "SELECT type, state_key, event_id FROM state_groups_state" + " WHERE state_group = ? " + where_clause, + args, + ) + results[group].update( + ((typ, state_key), event_id) + for typ, state_key, event_id in txn + if (typ, state_key) not in results[group] + ) + + # If the number of entries in the (type,state_key)->event_id dict + # matches the number of (type,state_keys) types we were searching + # for, then we must have found them all, so no need to go walk + # further down the tree... UNLESS our types filter contained + # wildcards (i.e. Nones) in which case we have to do an exhaustive + # search + if ( + max_entries_returned is not None + and len(results[group]) == max_entries_returned + ): + break + + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + + return results + + # this inherits from EventsWorkerStore because it calls self.get_events -class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): +class StateGroupWorkerStore( + EventsWorkerStore, StateGroupBackgroundUpdateStore, SQLBaseStore +): """The parts of StateGroupStore that can be called from workers. """ @@ -694,107 +844,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): return results - def _get_state_groups_from_groups_txn( - self, txn, groups, state_filter=StateFilter.all() - ): - results = {group: {} for group in groups} - - where_clause, where_args = state_filter.make_sql_filter_clause() - - # Unless the filter clause is empty, we're going to append it after an - # existing where clause - if where_clause: - where_clause = " AND (%s)" % (where_clause,) - - if isinstance(self.database_engine, PostgresEngine): - # Temporarily disable sequential scans in this transaction. This is - # a temporary hack until we can add the right indices in - txn.execute("SET LOCAL enable_seqscan=off") - - # The below query walks the state_group tree so that the "state" - # table includes all state_groups in the tree. It then joins - # against `state_groups_state` to fetch the latest state. - # It assumes that previous state groups are always numerically - # lesser. - # The PARTITION is used to get the event_id in the greatest state - # group for the given type, state_key. - # This may return multiple rows per (type, state_key), but last_value - # should be the same. - sql = """ - WITH RECURSIVE state(state_group) AS ( - VALUES(?::bigint) - UNION ALL - SELECT prev_state_group FROM state_group_edges e, state s - WHERE s.state_group = e.state_group - ) - SELECT DISTINCT type, state_key, last_value(event_id) OVER ( - PARTITION BY type, state_key ORDER BY state_group ASC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) AS event_id FROM state_groups_state - WHERE state_group IN ( - SELECT state_group FROM state - ) - """ - - for group in groups: - args = [group] - args.extend(where_args) - - txn.execute(sql + where_clause, args) - for row in txn: - typ, state_key, event_id = row - key = (typ, state_key) - results[group][key] = event_id - else: - max_entries_returned = state_filter.max_entries_returned() - - # We don't use WITH RECURSIVE on sqlite3 as there are distributions - # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - for group in groups: - next_group = group - - while next_group: - # We did this before by getting the list of group ids, and - # then passing that list to sqlite to get latest event for - # each (type, state_key). However, that was terribly slow - # without the right indices (which we can't add until - # after we finish deduping state, which requires this func) - args = [next_group] - args.extend(where_args) - - txn.execute( - "SELECT type, state_key, event_id FROM state_groups_state" - " WHERE state_group = ? " + where_clause, - args, - ) - results[group].update( - ((typ, state_key), event_id) - for typ, state_key, event_id in txn - if (typ, state_key) not in results[group] - ) - - # If the number of entries in the (type,state_key)->event_id dict - # matches the number of (type,state_keys) types we were searching - # for, then we must have found them all, so no need to go walk - # further down the tree... UNLESS our types filter contained - # wildcards (i.e. Nones) in which case we have to do an exhaustive - # search - if ( - max_entries_returned is not None - and len(results[group]) == max_entries_returned - ): - break - - next_group = self._simple_select_one_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"state_group": next_group}, - retcol="prev_state_group", - allow_none=True, - ) - - return results - @defer.inlineCallbacks def get_state_for_events(self, event_ids, state_filter=StateFilter.all()): """Given a list of event_ids and type tuples, return a list of state @@ -1238,66 +1287,8 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): return self.runInteraction("store_state_group", _store_state_group_txn) - def _count_state_group_hops_txn(self, txn, state_group): - """Given a state group, count how many hops there are in the tree. - - This is used to ensure the delta chains don't get too long. - """ - if isinstance(self.database_engine, PostgresEngine): - sql = """ - WITH RECURSIVE state(state_group) AS ( - VALUES(?::bigint) - UNION ALL - SELECT prev_state_group FROM state_group_edges e, state s - WHERE s.state_group = e.state_group - ) - SELECT count(*) FROM state; - """ - - txn.execute(sql, (state_group,)) - row = txn.fetchone() - if row and row[0]: - return row[0] - else: - return 0 - else: - # We don't use WITH RECURSIVE on sqlite3 as there are distributions - # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - next_group = state_group - count = 0 - - while next_group: - next_group = self._simple_select_one_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"state_group": next_group}, - retcol="prev_state_group", - allow_none=True, - ) - if next_group: - count += 1 - - return count - -class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): - """ Keeps track of the state at a given event. - - This is done by the concept of `state groups`. Every event is a assigned - a state group (identified by an arbitrary string), which references a - collection of state events. The current state of an event is then the - collection of state events referenced by the event's state group. - - Hence, every change in the current state causes a new state group to be - generated. However, if no change happens (e.g., if we get a message event - with only one parent it inherits the state group from its parent.) - - There are three tables: - * `state_groups`: Stores group name, first event with in the group and - room id. - * `event_to_state_groups`: Maps events to state groups. - * `state_groups_state`: Maps state group to state events. - """ +class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore, BackgroundUpdateStore): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" @@ -1305,7 +1296,7 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" def __init__(self, db_conn, hs): - super(StateStore, self).__init__(db_conn, hs) + super(StateBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_update_handler( self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, self._background_deduplicate_state, @@ -1327,34 +1318,6 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): columns=["state_group"], ) - def _store_event_state_mappings_txn(self, txn, events_and_contexts): - state_groups = {} - for event, context in events_and_contexts: - if event.internal_metadata.is_outlier(): - continue - - # if the event was rejected, just give it the same state as its - # predecessor. - if context.rejected: - state_groups[event.event_id] = context.prev_group - continue - - state_groups[event.event_id] = context.state_group - - self._simple_insert_many_txn( - txn, - table="event_to_state_groups", - values=[ - {"state_group": state_group_id, "event_id": event_id} - for event_id, state_group_id in iteritems(state_groups) - ], - ) - - for event_id, state_group_id in iteritems(state_groups): - txn.call_after( - self._get_state_group_for_event.prefill, (event_id,), state_group_id - ) - @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): """This background update will slowly deduplicate state by reencoding @@ -1527,3 +1490,54 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): yield self._end_background_update(self.STATE_GROUP_INDEX_UPDATE_NAME) return 1 + + +class StateStore(StateGroupWorkerStore, StateBackgroundUpdateStore): + """ Keeps track of the state at a given event. + + This is done by the concept of `state groups`. Every event is a assigned + a state group (identified by an arbitrary string), which references a + collection of state events. The current state of an event is then the + collection of state events referenced by the event's state group. + + Hence, every change in the current state causes a new state group to be + generated. However, if no change happens (e.g., if we get a message event + with only one parent it inherits the state group from its parent.) + + There are three tables: + * `state_groups`: Stores group name, first event with in the group and + room id. + * `event_to_state_groups`: Maps events to state groups. + * `state_groups_state`: Maps state group to state events. + """ + + def __init__(self, db_conn, hs): + super(StateStore, self).__init__(db_conn, hs) + + def _store_event_state_mappings_txn(self, txn, events_and_contexts): + state_groups = {} + for event, context in events_and_contexts: + if event.internal_metadata.is_outlier(): + continue + + # if the event was rejected, just give it the same state as its + # predecessor. + if context.rejected: + state_groups[event.event_id] = context.prev_group + continue + + state_groups[event.event_id] = context.state_group + + self._simple_insert_many_txn( + txn, + table="event_to_state_groups", + values=[ + {"state_group": state_group_id, "event_id": event_id} + for event_id, state_group_id in iteritems(state_groups) + ], + ) + + for event_id, state_group_id in iteritems(state_groups): + txn.call_after( + self._get_state_group_for_event.prefill, (event_id,), state_group_id + ) -- cgit 1.4.1 From e106a0e4db23fa1fedcce1c169985a8c91181c86 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 18:19:08 +0100 Subject: Move user_directory's bg updates to a dedicated store --- synapse/storage/user_directory.py | 178 ++++++++++++++++++++------------------ 1 file changed, 94 insertions(+), 84 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index b5188d9bee..1b1e4751b9 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -32,14 +32,14 @@ logger = logging.getLogger(__name__) TEMP_TABLE = "_temp_populate_user_directory" -class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): +class UserDirectoryBackgroundUpdateStore(StateDeltasStore, BackgroundUpdateStore): # How many records do we calculate before sending it to # add_users_who_share_private_rooms? SHARE_PRIVATE_WORKING_SET = 500 def __init__(self, db_conn, hs): - super(UserDirectoryStore, self).__init__(db_conn, hs) + super(UserDirectoryBackgroundUpdateStore, self).__init__(db_conn, hs) self.server_name = hs.hostname @@ -452,55 +452,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): "update_profile_in_user_dir", _update_profile_in_user_dir_txn ) - def remove_from_user_dir(self, user_id): - def _remove_from_user_dir_txn(txn): - self._simple_delete_txn( - txn, table="user_directory", keyvalues={"user_id": user_id} - ) - self._simple_delete_txn( - txn, table="user_directory_search", keyvalues={"user_id": user_id} - ) - self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} - ) - self._simple_delete_txn( - txn, - table="users_who_share_private_rooms", - keyvalues={"user_id": user_id}, - ) - self._simple_delete_txn( - txn, - table="users_who_share_private_rooms", - keyvalues={"other_user_id": user_id}, - ) - txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - - return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) - - @defer.inlineCallbacks - def get_users_in_dir_due_to_room(self, room_id): - """Get all user_ids that are in the room directory because they're - in the given room_id - """ - user_ids_share_pub = yield self._simple_select_onecol( - table="users_in_public_rooms", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids_share_priv = yield self._simple_select_onecol( - table="users_who_share_private_rooms", - keyvalues={"room_id": room_id}, - retcol="other_user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids = set(user_ids_share_pub) - user_ids.update(user_ids_share_priv) - - return user_ids - def add_users_who_share_private_room(self, room_id, user_id_tuples): """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. @@ -551,6 +502,98 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) + def delete_all_from_user_dir(self): + """Delete the entire user directory + """ + + def _delete_all_from_user_dir_txn(txn): + txn.execute("DELETE FROM user_directory") + txn.execute("DELETE FROM user_directory_search") + txn.execute("DELETE FROM users_in_public_rooms") + txn.execute("DELETE FROM users_who_share_private_rooms") + txn.call_after(self.get_user_in_directory.invalidate_all) + + return self.runInteraction( + "delete_all_from_user_dir", _delete_all_from_user_dir_txn + ) + + @cached() + def get_user_in_directory(self, user_id): + return self._simple_select_one( + table="user_directory", + keyvalues={"user_id": user_id}, + retcols=("display_name", "avatar_url"), + allow_none=True, + desc="get_user_in_directory", + ) + + def update_user_directory_stream_pos(self, stream_id): + return self._simple_update_one( + table="user_directory_stream_pos", + keyvalues={}, + updatevalues={"stream_id": stream_id}, + desc="update_user_directory_stream_pos", + ) + + +class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): + + # How many records do we calculate before sending it to + # add_users_who_share_private_rooms? + SHARE_PRIVATE_WORKING_SET = 500 + + def __init__(self, db_conn, hs): + super(UserDirectoryStore, self).__init__(db_conn, hs) + + def remove_from_user_dir(self, user_id): + def _remove_from_user_dir_txn(txn): + self._simple_delete_txn( + txn, table="user_directory", keyvalues={"user_id": user_id} + ) + self._simple_delete_txn( + txn, table="user_directory_search", keyvalues={"user_id": user_id} + ) + self._simple_delete_txn( + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id}, + ) + txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) + + return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) + + @defer.inlineCallbacks + def get_users_in_dir_due_to_room(self, room_id): + """Get all user_ids that are in the room directory because they're + in the given room_id + """ + user_ids_share_pub = yield self._simple_select_onecol( + table="users_in_public_rooms", + keyvalues={"room_id": room_id}, + retcol="user_id", + desc="get_users_in_dir_due_to_room", + ) + + user_ids_share_priv = yield self._simple_select_onecol( + table="users_who_share_private_rooms", + keyvalues={"room_id": room_id}, + retcol="other_user_id", + desc="get_users_in_dir_due_to_room", + ) + + user_ids = set(user_ids_share_pub) + user_ids.update(user_ids_share_priv) + + return user_ids + def remove_user_who_share_room(self, user_id, room_id): """ Deletes entries in the users_who_share_*_rooms table. The first @@ -637,31 +680,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): return [room_id for room_id, in rows] - def delete_all_from_user_dir(self): - """Delete the entire user directory - """ - - def _delete_all_from_user_dir_txn(txn): - txn.execute("DELETE FROM user_directory") - txn.execute("DELETE FROM user_directory_search") - txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_private_rooms") - txn.call_after(self.get_user_in_directory.invalidate_all) - - return self.runInteraction( - "delete_all_from_user_dir", _delete_all_from_user_dir_txn - ) - - @cached() - def get_user_in_directory(self, user_id): - return self._simple_select_one( - table="user_directory", - keyvalues={"user_id": user_id}, - retcols=("display_name", "avatar_url"), - allow_none=True, - desc="get_user_in_directory", - ) - def get_user_directory_stream_pos(self): return self._simple_select_one_onecol( table="user_directory_stream_pos", @@ -670,14 +688,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): desc="get_user_directory_stream_pos", ) - def update_user_directory_stream_pos(self, stream_id): - return self._simple_update_one( - table="user_directory_stream_pos", - keyvalues={}, - updatevalues={"stream_id": stream_id}, - desc="update_user_directory_stream_pos", - ) - @defer.inlineCallbacks def search_user_dir(self, user_id, search_term, limit): """Searches for users in directory -- cgit 1.4.1 From 0496eafbf4277523430114cf965a254241b290e7 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 19:00:55 +0100 Subject: Move roommember's bg updates to a dedicated store --- synapse/storage/roommember.py | 222 ++++++++++++++++++++++-------------------- 1 file changed, 114 insertions(+), 108 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 59a89fad60..4e606a8380 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -27,6 +27,7 @@ from synapse.api.constants import EventTypes, Membership from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import LoggingTransaction +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import Sqlite3Engine from synapse.storage.events_worker import EventsWorkerStore from synapse.types import get_domain_from_id @@ -820,9 +821,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): return set(room_ids) -class RoomMemberStore(RoomMemberWorkerStore): +class RoomMemberBackgroundUpdateStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): - super(RoomMemberStore, self).__init__(db_conn, hs) + super(RoomMemberBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_update_handler( _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile ) @@ -838,112 +839,6 @@ class RoomMemberStore(RoomMemberWorkerStore): where_clause="forgotten = 1", ) - def _store_room_members_txn(self, txn, events, backfilled): - """Store a room member in the database. - """ - self._simple_insert_many_txn( - txn, - table="room_memberships", - values=[ - { - "event_id": event.event_id, - "user_id": event.state_key, - "sender": event.user_id, - "room_id": event.room_id, - "membership": event.membership, - "display_name": event.content.get("displayname", None), - "avatar_url": event.content.get("avatar_url", None), - } - for event in events - ], - ) - - for event in events: - txn.call_after( - self._membership_stream_cache.entity_has_changed, - event.state_key, - event.internal_metadata.stream_ordering, - ) - txn.call_after( - self.get_invited_rooms_for_user.invalidate, (event.state_key,) - ) - - # We update the local_invites table only if the event is "current", - # i.e., its something that has just happened. If the event is an - # outlier it is only current if its an "out of band membership", - # like a remote invite or a rejection of a remote invite. - is_new_state = not backfilled and ( - not event.internal_metadata.is_outlier() - or event.internal_metadata.is_out_of_band_membership() - ) - is_mine = self.hs.is_mine_id(event.state_key) - if is_new_state and is_mine: - if event.membership == Membership.INVITE: - self._simple_insert_txn( - txn, - table="local_invites", - values={ - "event_id": event.event_id, - "invitee": event.state_key, - "inviter": event.sender, - "room_id": event.room_id, - "stream_id": event.internal_metadata.stream_ordering, - }, - ) - else: - sql = ( - "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - txn.execute( - sql, - ( - event.internal_metadata.stream_ordering, - event.event_id, - event.room_id, - event.state_key, - ), - ) - - @defer.inlineCallbacks - def locally_reject_invite(self, user_id, room_id): - sql = ( - "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - def f(txn, stream_ordering): - txn.execute(sql, (stream_ordering, True, room_id, user_id)) - - with self._stream_id_gen.get_next() as stream_ordering: - yield self.runInteraction("locally_reject_invite", f, stream_ordering) - - def forget(self, user_id, room_id): - """Indicate that user_id wishes to discard history for room_id.""" - - def f(txn): - sql = ( - "UPDATE" - " room_memberships" - " SET" - " forgotten = 1" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - ) - txn.execute(sql, (user_id, room_id)) - - self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id)) - self._invalidate_cache_and_stream( - txn, self.get_forgotten_rooms_for_user, (user_id,) - ) - - return self.runInteraction("forget_membership", f) - @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( @@ -1078,6 +973,117 @@ class RoomMemberStore(RoomMemberWorkerStore): return row_count +class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(RoomMemberStore, self).__init__(db_conn, hs) + + def _store_room_members_txn(self, txn, events, backfilled): + """Store a room member in the database. + """ + self._simple_insert_many_txn( + txn, + table="room_memberships", + values=[ + { + "event_id": event.event_id, + "user_id": event.state_key, + "sender": event.user_id, + "room_id": event.room_id, + "membership": event.membership, + "display_name": event.content.get("displayname", None), + "avatar_url": event.content.get("avatar_url", None), + } + for event in events + ], + ) + + for event in events: + txn.call_after( + self._membership_stream_cache.entity_has_changed, + event.state_key, + event.internal_metadata.stream_ordering, + ) + txn.call_after( + self.get_invited_rooms_for_user.invalidate, (event.state_key,) + ) + + # We update the local_invites table only if the event is "current", + # i.e., its something that has just happened. If the event is an + # outlier it is only current if its an "out of band membership", + # like a remote invite or a rejection of a remote invite. + is_new_state = not backfilled and ( + not event.internal_metadata.is_outlier() + or event.internal_metadata.is_out_of_band_membership() + ) + is_mine = self.hs.is_mine_id(event.state_key) + if is_new_state and is_mine: + if event.membership == Membership.INVITE: + self._simple_insert_txn( + txn, + table="local_invites", + values={ + "event_id": event.event_id, + "invitee": event.state_key, + "inviter": event.sender, + "room_id": event.room_id, + "stream_id": event.internal_metadata.stream_ordering, + }, + ) + else: + sql = ( + "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + txn.execute( + sql, + ( + event.internal_metadata.stream_ordering, + event.event_id, + event.room_id, + event.state_key, + ), + ) + + @defer.inlineCallbacks + def locally_reject_invite(self, user_id, room_id): + sql = ( + "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + def f(txn, stream_ordering): + txn.execute(sql, (stream_ordering, True, room_id, user_id)) + + with self._stream_id_gen.get_next() as stream_ordering: + yield self.runInteraction("locally_reject_invite", f, stream_ordering) + + def forget(self, user_id, room_id): + """Indicate that user_id wishes to discard history for room_id.""" + + def f(txn): + sql = ( + "UPDATE" + " room_memberships" + " SET" + " forgotten = 1" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + ) + txn.execute(sql, (user_id, room_id)) + + self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id)) + self._invalidate_cache_and_stream( + txn, self.get_forgotten_rooms_for_user, (user_id,) + ) + + return self.runInteraction("forget_membership", f) + + class _JoinedHostsCache(object): """Cache for joined hosts in a room that is optimised to handle updates via state deltas. -- cgit 1.4.1 From 66ebea17235d9d3988d56cd1355656bbb508b3be Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 18:23:05 +0100 Subject: Lint --- synapse/storage/media_repository.py | 1 - synapse/storage/search.py | 1 - synapse/storage/state.py | 4 +++- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 2eb2740d07..84b5f3ad5e 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -16,7 +16,6 @@ from synapse.storage.background_updates import BackgroundUpdateStore class MediaRepositoryBackgroundUpdateStore(BackgroundUpdateStore): - def __init__(self, db_conn, hs): super(MediaRepositoryBackgroundUpdateStore, self).__init__(db_conn, hs) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 9a41e78002..6ba4190f1a 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -337,7 +337,6 @@ class SearchBackgroundUpdateStore(BackgroundUpdateStore): class SearchStore(SearchBackgroundUpdateStore): - def __init__(self, db_conn, hs): super(SearchStore, self).__init__(db_conn, hs) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 71b533c006..a941a5ae3f 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1288,7 +1288,9 @@ class StateGroupWorkerStore( return self.runInteraction("store_state_group", _store_state_group_txn) -class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore, BackgroundUpdateStore): +class StateBackgroundUpdateStore( + StateGroupBackgroundUpdateStore, BackgroundUpdateStore +): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" -- cgit 1.4.1 From b94a401852a5b6d87455285ea050c4e0731dd6ab Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 8 Oct 2019 09:35:37 +0100 Subject: Fix /federation/v1/state for recent room versions (#6170) * Fix /federation/v1/state for recent room versions Turns out this endpoint was completely broken for v3 rooms. Hopefully this re-signing code is irrelevant nowadays anyway. --- changelog.d/6170.bugfix | 1 + synapse/federation/federation_server.py | 13 ------------- 2 files changed, 1 insertion(+), 13 deletions(-) create mode 100644 changelog.d/6170.bugfix (limited to 'synapse') diff --git a/changelog.d/6170.bugfix b/changelog.d/6170.bugfix new file mode 100644 index 0000000000..52f7ea233c --- /dev/null +++ b/changelog.d/6170.bugfix @@ -0,0 +1 @@ +Fix /federation/v1/state endpoint for recent room versions. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index da06ab379d..21e52c9695 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -36,7 +36,6 @@ from synapse.api.errors import ( UnsupportedRoomVersionError, ) from synapse.api.room_versions import KNOWN_ROOM_VERSIONS -from synapse.crypto.event_signing import compute_event_signature from synapse.events import room_version_to_event_format from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.federation.persistence import TransactionActions @@ -322,18 +321,6 @@ class FederationServer(FederationBase): pdus = yield self.handler.get_state_for_pdu(room_id, event_id) auth_chain = yield self.store.get_auth_chain([pdu.event_id for pdu in pdus]) - for event in auth_chain: - # We sign these again because there was a bug where we - # incorrectly signed things the first time round - if self.hs.is_mine_id(event.event_id): - event.signatures.update( - compute_event_signature( - event.get_pdu_json(), - self.hs.hostname, - self.hs.config.signing_key[0], - ) - ) - return { "pdus": [pdu.get_pdu_json() for pdu in pdus], "auth_chain": [pdu.get_pdu_json() for pdu in auth_chain], -- cgit 1.4.1 From ea7d938bca2d5fa0d6a54412ecdf036c5a3fc3a7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 8 Oct 2019 13:51:25 +0100 Subject: Remove unused public room list timeout param (#6179) * Remove unused public room list timeout param * Add changelog --- changelog.d/6179.misc | 1 + synapse/handlers/room_list.py | 13 +------------ 2 files changed, 2 insertions(+), 12 deletions(-) create mode 100644 changelog.d/6179.misc (limited to 'synapse') diff --git a/changelog.d/6179.misc b/changelog.d/6179.misc new file mode 100644 index 0000000000..01c4e71ea3 --- /dev/null +++ b/changelog.d/6179.misc @@ -0,0 +1 @@ +Remove unused `timeout` parameter from `_get_public_room_list`. \ No newline at end of file diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index cfed344d4d..c615206df1 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -88,16 +88,8 @@ class RoomListHandler(BaseHandler): # appservice specific lists. logger.info("Bypassing cache as search request.") - # XXX: Quick hack to stop room directory queries taking too long. - # Timeout request after 60s. Probably want a more fundamental - # solution at some point - timeout = self.clock.time() + 60 return self._get_public_room_list( - limit, - since_token, - search_filter, - network_tuple=network_tuple, - timeout=timeout, + limit, since_token, search_filter, network_tuple=network_tuple ) key = (limit, since_token, network_tuple) @@ -118,7 +110,6 @@ class RoomListHandler(BaseHandler): search_filter=None, network_tuple=EMPTY_THIRD_PARTY_ID, from_federation=False, - timeout=None, ): """Generate a public room list. Args: @@ -131,8 +122,6 @@ class RoomListHandler(BaseHandler): Setting to None returns all public rooms across all lists. from_federation (bool): Whether this request originated from a federating server or a client. Used for room filtering. - timeout (int|None): Amount of seconds to wait for a response before - timing out. TODO """ # Pagination tokens work by storing the room ID sent in the last batch, -- cgit 1.4.1 From 474abf1eb6852ca488fbf86d3da0622a457efef1 Mon Sep 17 00:00:00 2001 From: Anshul Angaria Date: Tue, 8 Oct 2019 18:25:16 +0530 Subject: add M_TOO_LARGE error code for uploading a too large file (#6151) Fixes #6109 --- changelog.d/6109.bugfix | 1 + synapse/rest/media/v1/upload_resource.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6109.bugfix (limited to 'synapse') diff --git a/changelog.d/6109.bugfix b/changelog.d/6109.bugfix new file mode 100644 index 0000000000..da7ac1be4e --- /dev/null +++ b/changelog.d/6109.bugfix @@ -0,0 +1 @@ +Fix bug when uploading a large file: Synapse responds with `M_UNKNOWN` while it should be `M_TOO_LARGE` according to spec. Contributed by Anshul Angaria. diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 5d76bbdf68..83d005812d 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -17,7 +17,7 @@ import logging from twisted.web.server import NOT_DONE_YET -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, SynapseError from synapse.http.server import ( DirectServeResource, respond_with_json, @@ -56,7 +56,11 @@ class UploadResource(DirectServeResource): if content_length is None: raise SynapseError(msg="Request must specify a Content-Length", code=400) if int(content_length) > self.max_upload_size: - raise SynapseError(msg="Upload request body is too large", code=413) + raise SynapseError( + msg="Upload request body is too large", + code=413, + errcode=Codes.TOO_LARGE, + ) upload_name = parse_string(request, b"filename", encoding=None) if upload_name: -- cgit 1.4.1 From 8f1b385accbf8be15c35b6f06b18eb6d998544e4 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 8 Oct 2019 14:36:33 +0100 Subject: Don't end up with 4 classes in registration --- synapse/storage/registration.py | 102 ++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 52 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 1f6c93b73b..524b5eeaba 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -37,57 +37,7 @@ THIRTY_MINUTES_IN_MS = 30 * 60 * 1000 logger = logging.getLogger(__name__) -class RegistrationDeactivationStore(SQLBaseStore): - @cachedInlineCallbacks() - def get_user_deactivated_status(self, user_id): - """Retrieve the value for the `deactivated` property for the provided user. - - Args: - user_id (str): The ID of the user to retrieve the status for. - - Returns: - defer.Deferred(bool): The requested value. - """ - - res = yield self._simple_select_one_onecol( - table="users", - keyvalues={"name": user_id}, - retcol="deactivated", - desc="get_user_deactivated_status", - ) - - # Convert the integer into a boolean. - return res == 1 - - @defer.inlineCallbacks - def set_user_deactivated_status(self, user_id, deactivated): - """Set the `deactivated` property for the provided user to the provided value. - - Args: - user_id (str): The ID of the user to set the status for. - deactivated (bool): The value to set for `deactivated`. - """ - - yield self.runInteraction( - "set_user_deactivated_status", - self.set_user_deactivated_status_txn, - user_id, - deactivated, - ) - - def set_user_deactivated_status_txn(self, txn, user_id, deactivated): - self._simple_update_one_txn( - txn=txn, - table="users", - keyvalues={"name": user_id}, - updatevalues={"deactivated": 1 if deactivated else 0}, - ) - self._invalidate_cache_and_stream( - txn, self.get_user_deactivated_status, (user_id,) - ) - - -class RegistrationWorkerStore(RegistrationDeactivationStore): +class RegistrationWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): super(RegistrationWorkerStore, self).__init__(db_conn, hs) @@ -723,6 +673,27 @@ class RegistrationWorkerStore(RegistrationDeactivationStore): desc="get_id_servers_user_bound", ) + @cachedInlineCallbacks() + def get_user_deactivated_status(self, user_id): + """Retrieve the value for the `deactivated` property for the provided user. + + Args: + user_id (str): The ID of the user to retrieve the status for. + + Returns: + defer.Deferred(bool): The requested value. + """ + + res = yield self._simple_select_one_onecol( + table="users", + keyvalues={"name": user_id}, + retcol="deactivated", + desc="get_user_deactivated_status", + ) + + # Convert the integer into a boolean. + return res == 1 + def get_threepid_validation_session( self, medium, client_secret, address=None, sid=None, validated=True ): @@ -817,7 +788,7 @@ class RegistrationWorkerStore(RegistrationDeactivationStore): class RegistrationBackgroundUpdateStore( - RegistrationDeactivationStore, background_updates.BackgroundUpdateStore + RegistrationWorkerStore, background_updates.BackgroundUpdateStore ): def __init__(self, db_conn, hs): super(RegistrationBackgroundUpdateStore, self).__init__(db_conn, hs) @@ -1499,3 +1470,30 @@ class RegistrationStore(RegistrationWorkerStore, RegistrationBackgroundUpdateSto cull_expired_threepid_validation_tokens_txn, self.clock.time_msec(), ) + + @defer.inlineCallbacks + def set_user_deactivated_status(self, user_id, deactivated): + """Set the `deactivated` property for the provided user to the provided value. + + Args: + user_id (str): The ID of the user to set the status for. + deactivated (bool): The value to set for `deactivated`. + """ + + yield self.runInteraction( + "set_user_deactivated_status", + self.set_user_deactivated_status_txn, + user_id, + deactivated, + ) + + def set_user_deactivated_status_txn(self, txn, user_id, deactivated): + self._simple_update_one_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + updatevalues={"deactivated": 1 if deactivated else 0}, + ) + self._invalidate_cache_and_stream( + txn, self.get_user_deactivated_status, (user_id,) + ) -- cgit 1.4.1 From b1c0a4ceb3c2c5ca51f0b32efcd58d8493fd9b99 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 8 Oct 2019 14:38:14 +0100 Subject: Cleanup client_ips --- synapse/storage/client_ips.py | 7 ------- 1 file changed, 7 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 1d89b50f57..067820a5da 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -35,15 +35,8 @@ LAST_SEEN_GRANULARITY = 120 * 1000 class ClientIpBackgroundUpdateStore(background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): - - self.client_ip_last_seen = Cache( - name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR - ) - super(ClientIpBackgroundUpdateStore, self).__init__(db_conn, hs) - self.user_ips_max_age = hs.config.user_ips_max_age - self.register_background_index_update( "user_ips_device_index", index_name="user_ips_device_id", -- cgit 1.4.1 From c69324ffb588f72786c37b864d510abd279e47a2 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 8 Oct 2019 14:48:33 +0100 Subject: Fix RegistrationStore --- synapse/storage/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 524b5eeaba..6c5b29288a 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -915,7 +915,7 @@ class RegistrationBackgroundUpdateStore( return 1 -class RegistrationStore(RegistrationWorkerStore, RegistrationBackgroundUpdateStore): +class RegistrationStore(RegistrationBackgroundUpdateStore): def __init__(self, db_conn, hs): super(RegistrationStore, self).__init__(db_conn, hs) -- cgit 1.4.1 From ced4784592ab6b9080ec1d6c7aa2664a42b3a38e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Oct 2019 14:31:43 +0100 Subject: Fix inserting bytes as text --- synapse/storage/events.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2e485c8644..bb6ff0595a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -23,7 +23,7 @@ from functools import wraps from six import iteritems, text_type from six.moves import range -from canonicaljson import encode_canonical_json, json +from canonicaljson import json from prometheus_client import Counter, Histogram from twisted.internet import defer @@ -1632,9 +1632,7 @@ class EventsStore( and original_event.internal_metadata.is_redacted() ): # Redaction was allowed - pruned_json = encode_canonical_json( - prune_event_dict(original_event.get_dict()) - ) + pruned_json = encode_json(prune_event_dict(original_event.get_dict())) else: # Redaction wasn't allowed pruned_json = None -- cgit 1.4.1 From e7631d84e62e05b0dd0087b1b32b1f5f3ac521c5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Oct 2019 15:09:05 +0100 Subject: Fix existing hex encoded json values in DB --- .../56/redaction_censor3_fix_update.sql.postgres | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres (limited to 'synapse') diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres new file mode 100644 index 0000000000..f7bcc5e2f2 --- /dev/null +++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres @@ -0,0 +1,26 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- There was a bug where we may have updated censored redactions as bytes, +-- which can (somehow) cause json to be inserted hex encoded. This goes and +-- undoes any such hex encoded JSON. +UPDATE event_json SET json = convert_from(json::bytea, 'utf8') +WHERE event_id IN ( + SELECT event_json.event_id + FROM event_json + INNER JOIN redactions ON (event_json.event_id = redacts) + WHERE have_censored AND json NOT LIKE '{%' +); -- cgit 1.4.1 From 1d3858371e9577faf3382d1feee97154e5085cd4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Oct 2019 16:21:17 +0100 Subject: Disable bytes usage with postgres More often than not passing bytes to `txn.execute` is a bug (where we meant to pass a string) that just happens to work if `BYTEA_OUTPUT` is set to `ESCAPE`. However, this is a bit of a footgun so we want to instead error when this happens, and force using `bytearray` if we actually want to use bytes. --- synapse/storage/engines/postgres.py | 7 +++++++ synapse/storage/filtering.py | 4 ++-- synapse/storage/pusher.py | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 601617b21e..d670286fa5 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -22,6 +22,13 @@ class PostgresEngine(object): def __init__(self, database_module, database_config): self.module = database_module self.module.extensions.register_type(self.module.extensions.UNICODE) + + # Disables passing `bytes` to txn.execute, c.f. #6186. If you do + # actually want to use bytes than wrap it in `bytearray`. + def _disable_bytes_adapter(_): + raise Exception("Passing bytes to DB is disabled.") + + self.module.extensions.register_adapter(bytes, _disable_bytes_adapter) self.synchronous_commit = database_config.get("synchronous_commit", True) self._version = None # unknown as yet diff --git a/synapse/storage/filtering.py b/synapse/storage/filtering.py index 23b48f6cea..7c2a7da836 100644 --- a/synapse/storage/filtering.py +++ b/synapse/storage/filtering.py @@ -51,7 +51,7 @@ class FilteringStore(SQLBaseStore): "SELECT filter_id FROM user_filters " "WHERE user_id = ? AND filter_json = ?" ) - txn.execute(sql, (user_localpart, def_json)) + txn.execute(sql, (user_localpart, bytearray(def_json))) filter_id_response = txn.fetchone() if filter_id_response is not None: return filter_id_response[0] @@ -68,7 +68,7 @@ class FilteringStore(SQLBaseStore): "INSERT INTO user_filters (user_id, filter_id, filter_json)" "VALUES(?, ?, ?)" ) - txn.execute(sql, (user_localpart, filter_id, def_json)) + txn.execute(sql, (user_localpart, filter_id, bytearray(def_json))) return filter_id diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index 3e0e834a62..b12e80440a 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -241,7 +241,7 @@ class PusherStore(PusherWorkerStore): "device_display_name": device_display_name, "ts": pushkey_ts, "lang": lang, - "data": encode_canonical_json(data), + "data": bytearray(encode_canonical_json(data)), "last_stream_ordering": last_stream_ordering, "profile_tag": profile_tag, "id": stream_id, -- cgit 1.4.1 From 7f18b3d5262746d4095c747f6b80899445f0aa2d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 9 Oct 2019 16:03:24 +0100 Subject: Do the update as a background index --- synapse/storage/events_bg_updates.py | 43 ++++++++++++++++++++++ .../56/redaction_censor3_fix_update.sql.postgres | 17 ++++----- 2 files changed, 51 insertions(+), 9 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 5717baf48c..e77a7e28af 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -71,6 +71,19 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): "redactions_received_ts", self._redactions_received_ts ) + # This index gets deleted in `event_fix_redactions_bytes` update + self.register_background_index_update( + "event_fix_redactions_bytes_create_index", + index_name="redactions_censored_redacts", + table="redactions", + columns=["redacts"], + where_clause="have_censored", + ) + + self.register_background_update_handler( + "event_fix_redactions_bytes", self._event_fix_redactions_bytes + ) + @defer.inlineCallbacks def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] @@ -458,3 +471,33 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): yield self._end_background_update("redactions_received_ts") return count + + @defer.inlineCallbacks + def _event_fix_redactions_bytes(self, progress, batch_size): + """Undoes hex encoded censored redacted event JSON. + """ + + def _event_fix_redactions_bytes_txn(txn): + # This update is quite fast due to new index. + txn.execute( + """ + UPDATE event_json + SET + json = convert_from(json::bytea, 'utf8') + FROM redactions + WHERE + redactions.have_censored + AND event_json.event_id = redactions.redacts + AND json NOT LIKE '{%'; + """ + ) + + txn.execute("DROP INDEX redactions_censored_redacts") + + yield self.runInteraction( + "_event_fix_redactions_bytes", _event_fix_redactions_bytes_txn + ) + + yield self._end_background_update("event_fix_redactions_bytes") + + return 1 diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres index f7bcc5e2f2..67471f3ef5 100644 --- a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres +++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres @@ -15,12 +15,11 @@ -- There was a bug where we may have updated censored redactions as bytes, --- which can (somehow) cause json to be inserted hex encoded. This goes and --- undoes any such hex encoded JSON. -UPDATE event_json SET json = convert_from(json::bytea, 'utf8') -WHERE event_id IN ( - SELECT event_json.event_id - FROM event_json - INNER JOIN redactions ON (event_json.event_id = redacts) - WHERE have_censored AND json NOT LIKE '{%' -); +-- which can (somehow) cause json to be inserted hex encoded. These updates go +-- and undoes any such hex encoded JSON. + +INSERT into background_updates (update_name, progress_json) + VALUES ('event_fix_redactions_bytes_create_index', '{}'); + +INSERT into background_updates (update_name, progress_json, depends_on) + VALUES ('event_fix_redactions_bytes', '{}', 'event_fix_redactions_bytes_create_index'); -- cgit 1.4.1 From 4535a07f4af0854eed0cfd171e4032bdd3f39cbb Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 9 Oct 2019 17:54:03 -0400 Subject: make version optional in body of e2e backup version update to agree with latest version of the MSC --- synapse/handlers/e2e_room_keys.py | 4 +-- synapse/rest/client/v2_alpha/room_keys.py | 2 +- tests/handlers/test_e2e_room_keys.py | 47 ++++++++++++++++++++----------- 3 files changed, 34 insertions(+), 19 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index a9d80f708c..0cea445f0d 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -352,8 +352,8 @@ class E2eRoomKeysHandler(object): A deferred of an empty dict. """ if "version" not in version_info: - raise SynapseError(400, "Missing version in body", Codes.MISSING_PARAM) - if version_info["version"] != version: + version_info["version"] = version + elif version_info["version"] != version: raise SynapseError( 400, "Version in body does not match", Codes.INVALID_PARAM ) diff --git a/synapse/rest/client/v2_alpha/room_keys.py b/synapse/rest/client/v2_alpha/room_keys.py index df4f44cd36..d596786430 100644 --- a/synapse/rest/client/v2_alpha/room_keys.py +++ b/synapse/rest/client/v2_alpha/room_keys.py @@ -375,7 +375,7 @@ class RoomKeysVersionServlet(RestServlet): "ed25519:something": "hijklmnop" } }, - "version": "42" + "version": "12345" } HTTP/1.1 200 OK diff --git a/tests/handlers/test_e2e_room_keys.py b/tests/handlers/test_e2e_room_keys.py index c4503c1611..c700a2fad1 100644 --- a/tests/handlers/test_e2e_room_keys.py +++ b/tests/handlers/test_e2e_room_keys.py @@ -187,9 +187,8 @@ class E2eRoomKeysHandlerTestCase(unittest.TestCase): self.assertEqual(res, 404) @defer.inlineCallbacks - def test_update_bad_version(self): - """Check that we get a 400 if the version in the body is missing or - doesn't match + def test_update_missing_version(self): + """Check that the update succeeds if the version is missing from the body """ version = yield self.handler.create_version( self.local_user, @@ -197,19 +196,35 @@ class E2eRoomKeysHandlerTestCase(unittest.TestCase): ) self.assertEqual(version, "1") - res = None - try: - yield self.handler.update_version( - self.local_user, - version, - { - "algorithm": "m.megolm_backup.v1", - "auth_data": "revised_first_version_auth_data", - }, - ) - except errors.SynapseError as e: - res = e.code - self.assertEqual(res, 400) + yield self.handler.update_version( + self.local_user, + version, + { + "algorithm": "m.megolm_backup.v1", + "auth_data": "revised_first_version_auth_data", + }, + ) + + # check we can retrieve it as the current version + res = yield self.handler.get_version_info(self.local_user) + self.assertDictEqual( + res, + { + "algorithm": "m.megolm_backup.v1", + "auth_data": "revised_first_version_auth_data", + "version": version, + }, + ) + + @defer.inlineCallbacks + def test_update_bad_version(self): + """Check that we get a 400 if the version in the body doesn't match + """ + version = yield self.handler.create_version( + self.local_user, + {"algorithm": "m.megolm_backup.v1", "auth_data": "first_version_auth_data"}, + ) + self.assertEqual(version, "1") res = None try: -- cgit 1.4.1 From f743108a94658eb1dbaf168d39874272f756a386 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 10 Oct 2019 09:39:35 +0100 Subject: Refactor HomeserverConfig so it can be typechecked (#6137) --- changelog.d/6137.misc | 1 + mypy.ini | 16 ++- synapse/config/_base.py | 191 +++++++++++++++++++++++------- synapse/config/_base.pyi | 135 +++++++++++++++++++++ synapse/config/api.py | 2 + synapse/config/appservice.py | 2 + synapse/config/captcha.py | 2 + synapse/config/cas.py | 2 + synapse/config/consent_config.py | 3 + synapse/config/database.py | 2 + synapse/config/emailconfig.py | 2 + synapse/config/groups.py | 2 + synapse/config/homeserver.py | 68 +++++------ synapse/config/jwt_config.py | 2 + synapse/config/key.py | 2 + synapse/config/logger.py | 2 + synapse/config/metrics.py | 2 + synapse/config/password.py | 2 + synapse/config/password_auth_providers.py | 2 + synapse/config/push.py | 2 + synapse/config/ratelimiting.py | 2 + synapse/config/registration.py | 4 + synapse/config/repository.py | 2 + synapse/config/room_directory.py | 2 + synapse/config/saml2_config.py | 2 + synapse/config/server.py | 2 + synapse/config/server_notices_config.py | 2 + synapse/config/spam_checker.py | 2 + synapse/config/stats.py | 2 + synapse/config/third_party_event_rules.py | 2 + synapse/config/tls.py | 9 +- synapse/config/tracer.py | 2 + synapse/config/user_directory.py | 2 + synapse/config/voip.py | 2 + synapse/config/workers.py | 2 + tests/config/test_tls.py | 25 ++-- tox.ini | 3 +- 37 files changed, 415 insertions(+), 94 deletions(-) create mode 100644 changelog.d/6137.misc create mode 100644 synapse/config/_base.pyi (limited to 'synapse') diff --git a/changelog.d/6137.misc b/changelog.d/6137.misc new file mode 100644 index 0000000000..92a02e71c3 --- /dev/null +++ b/changelog.d/6137.misc @@ -0,0 +1 @@ +Refactor configuration loading to allow better typechecking. diff --git a/mypy.ini b/mypy.ini index 8788574ee3..ffadaddc0b 100644 --- a/mypy.ini +++ b/mypy.ini @@ -4,10 +4,6 @@ plugins=mypy_zope:plugin follow_imports=skip mypy_path=stubs -[mypy-synapse.config.homeserver] -# this is a mess because of the metaclass shenanigans -ignore_errors = True - [mypy-zope] ignore_missing_imports = True @@ -52,3 +48,15 @@ ignore_missing_imports = True [mypy-signedjson.*] ignore_missing_imports = True + +[mypy-prometheus_client.*] +ignore_missing_imports = True + +[mypy-service_identity.*] +ignore_missing_imports = True + +[mypy-daemonize] +ignore_missing_imports = True + +[mypy-sentry_sdk] +ignore_missing_imports = True diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 31f6530978..08619404bb 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -18,7 +18,9 @@ import argparse import errno import os +from collections import OrderedDict from textwrap import dedent +from typing import Any, MutableMapping, Optional from six import integer_types @@ -51,7 +53,56 @@ Missing mandatory `server_name` config option. """ +def path_exists(file_path): + """Check if a file exists + + Unlike os.path.exists, this throws an exception if there is an error + checking if the file exists (for example, if there is a perms error on + the parent dir). + + Returns: + bool: True if the file exists; False if not. + """ + try: + os.stat(file_path) + return True + except OSError as e: + if e.errno != errno.ENOENT: + raise e + return False + + class Config(object): + """ + A configuration section, containing configuration keys and values. + + Attributes: + section (str): The section title of this config object, such as + "tls" or "logger". This is used to refer to it on the root + logger (for example, `config.tls.some_option`). Must be + defined in subclasses. + """ + + section = None + + def __init__(self, root_config=None): + self.root = root_config + + def __getattr__(self, item: str) -> Any: + """ + Try and fetch a configuration option that does not exist on this class. + + This is so that existing configs that rely on `self.value`, where value + is actually from a different config section, continue to work. + """ + if item in ["generate_config_section", "read_config"]: + raise AttributeError(item) + + if self.root is None: + raise AttributeError(item) + else: + return self.root._get_unclassed_config(self.section, item) + @staticmethod def parse_size(value): if isinstance(value, integer_types): @@ -88,22 +139,7 @@ class Config(object): @classmethod def path_exists(cls, file_path): - """Check if a file exists - - Unlike os.path.exists, this throws an exception if there is an error - checking if the file exists (for example, if there is a perms error on - the parent dir). - - Returns: - bool: True if the file exists; False if not. - """ - try: - os.stat(file_path) - return True - except OSError as e: - if e.errno != errno.ENOENT: - raise e - return False + return path_exists(file_path) @classmethod def check_file(cls, file_path, config_name): @@ -136,42 +172,106 @@ class Config(object): with open(file_path) as file_stream: return file_stream.read() - def invoke_all(self, name, *args, **kargs): - """Invoke all instance methods with the given name and arguments in the - class's MRO. + +class RootConfig(object): + """ + Holder of an application's configuration. + + What configuration this object holds is defined by `config_classes`, a list + of Config classes that will be instantiated and given the contents of a + configuration file to read. They can then be accessed on this class by their + section name, defined in the Config or dynamically set to be the name of the + class, lower-cased and with "Config" removed. + """ + + config_classes = [] + + def __init__(self): + self._configs = OrderedDict() + + for config_class in self.config_classes: + if config_class.section is None: + raise ValueError("%r requires a section name" % (config_class,)) + + try: + conf = config_class(self) + except Exception as e: + raise Exception("Failed making %s: %r" % (config_class.section, e)) + self._configs[config_class.section] = conf + + def __getattr__(self, item: str) -> Any: + """ + Redirect lookups on this object either to config objects, or values on + config objects, so that `config.tls.blah` works, as well as legacy uses + of things like `config.server_name`. It will first look up the config + section name, and then values on those config classes. + """ + if item in self._configs.keys(): + return self._configs[item] + + return self._get_unclassed_config(None, item) + + def _get_unclassed_config(self, asking_section: Optional[str], item: str): + """ + Fetch a config value from one of the instantiated config classes that + has not been fetched directly. + + Args: + asking_section: If this check is coming from a Config child, which + one? This section will not be asked if it has the value. + item: The configuration value key. + + Raises: + AttributeError if no config classes have the config key. The body + will contain what sections were checked. + """ + for key, val in self._configs.items(): + if key == asking_section: + continue + + if item in dir(val): + return getattr(val, item) + + raise AttributeError(item, "not found in %s" % (list(self._configs.keys()),)) + + def invoke_all(self, func_name: str, *args, **kwargs) -> MutableMapping[str, Any]: + """ + Invoke a function on all instantiated config objects this RootConfig is + configured to use. Args: - name (str): Name of function to invoke + func_name: Name of function to invoke *args **kwargs - Returns: - list: The list of the return values from each method called + ordered dictionary of config section name and the result of the + function from it. """ - results = [] - for cls in type(self).mro(): - if name in cls.__dict__: - results.append(getattr(cls, name)(self, *args, **kargs)) - return results + res = OrderedDict() + + for name, config in self._configs.items(): + if hasattr(config, func_name): + res[name] = getattr(config, func_name)(*args, **kwargs) + + return res @classmethod - def invoke_all_static(cls, name, *args, **kargs): - """Invoke all static methods with the given name and arguments in the - class's MRO. + def invoke_all_static(cls, func_name: str, *args, **kwargs): + """ + Invoke a static function on config objects this RootConfig is + configured to use. Args: - name (str): Name of function to invoke + func_name: Name of function to invoke *args **kwargs - Returns: - list: The list of the return values from each method called + ordered dictionary of config section name and the result of the + function from it. """ - results = [] - for c in cls.mro(): - if name in c.__dict__: - results.append(getattr(c, name)(*args, **kargs)) - return results + for config in cls.config_classes: + if hasattr(config, func_name): + getattr(config, func_name)(*args, **kwargs) def generate_config( self, @@ -187,7 +287,8 @@ class Config(object): tls_private_key_path=None, acme_domain=None, ): - """Build a default configuration file + """ + Build a default configuration file This is used when the user explicitly asks us to generate a config file (eg with --generate_config). @@ -242,6 +343,7 @@ class Config(object): Returns: str: the yaml config file """ + return "\n\n".join( dedent(conf) for conf in self.invoke_all( @@ -257,7 +359,7 @@ class Config(object): tls_certificate_path=tls_certificate_path, tls_private_key_path=tls_private_key_path, acme_domain=acme_domain, - ) + ).values() ) @classmethod @@ -444,7 +546,7 @@ class Config(object): ) (config_path,) = config_files - if not cls.path_exists(config_path): + if not path_exists(config_path): print("Generating config file %s" % (config_path,)) if config_args.data_directory: @@ -469,7 +571,7 @@ class Config(object): open_private_ports=config_args.open_private_ports, ) - if not cls.path_exists(config_dir_path): + if not path_exists(config_dir_path): os.makedirs(config_dir_path) with open(config_path, "w") as config_file: config_file.write("# vim:ft=yaml\n\n") @@ -518,7 +620,7 @@ class Config(object): return obj - def parse_config_dict(self, config_dict, config_dir_path, data_dir_path): + def parse_config_dict(self, config_dict, config_dir_path=None, data_dir_path=None): """Read the information from the config dict into this Config object. Args: @@ -607,3 +709,6 @@ def find_config_files(search_paths): else: config_files.append(config_path) return config_files + + +__all__ = ["Config", "RootConfig"] diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi new file mode 100644 index 0000000000..86bc965ee4 --- /dev/null +++ b/synapse/config/_base.pyi @@ -0,0 +1,135 @@ +from typing import Any, List, Optional + +from synapse.config import ( + api, + appservice, + captcha, + cas, + consent_config, + database, + emailconfig, + groups, + jwt_config, + key, + logger, + metrics, + password, + password_auth_providers, + push, + ratelimiting, + registration, + repository, + room_directory, + saml2_config, + server, + server_notices_config, + spam_checker, + stats, + third_party_event_rules, + tls, + tracer, + user_directory, + voip, + workers, +) + +class ConfigError(Exception): ... + +MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str +MISSING_REPORT_STATS_SPIEL: str +MISSING_SERVER_NAME: str + +def path_exists(file_path: str): ... + +class RootConfig: + server: server.ServerConfig + tls: tls.TlsConfig + database: database.DatabaseConfig + logging: logger.LoggingConfig + ratelimit: ratelimiting.RatelimitConfig + media: repository.ContentRepositoryConfig + captcha: captcha.CaptchaConfig + voip: voip.VoipConfig + registration: registration.RegistrationConfig + metrics: metrics.MetricsConfig + api: api.ApiConfig + appservice: appservice.AppServiceConfig + key: key.KeyConfig + saml2: saml2_config.SAML2Config + cas: cas.CasConfig + jwt: jwt_config.JWTConfig + password: password.PasswordConfig + email: emailconfig.EmailConfig + worker: workers.WorkerConfig + authproviders: password_auth_providers.PasswordAuthProviderConfig + push: push.PushConfig + spamchecker: spam_checker.SpamCheckerConfig + groups: groups.GroupsConfig + userdirectory: user_directory.UserDirectoryConfig + consent: consent_config.ConsentConfig + stats: stats.StatsConfig + servernotices: server_notices_config.ServerNoticesConfig + roomdirectory: room_directory.RoomDirectoryConfig + thirdpartyrules: third_party_event_rules.ThirdPartyRulesConfig + tracer: tracer.TracerConfig + + config_classes: List = ... + def __init__(self) -> None: ... + def invoke_all(self, func_name: str, *args: Any, **kwargs: Any): ... + @classmethod + def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: Any) -> None: ... + def __getattr__(self, item: str): ... + def parse_config_dict( + self, + config_dict: Any, + config_dir_path: Optional[Any] = ..., + data_dir_path: Optional[Any] = ..., + ) -> None: ... + read_config: Any = ... + def generate_config( + self, + config_dir_path: str, + data_dir_path: str, + server_name: str, + generate_secrets: bool = ..., + report_stats: Optional[str] = ..., + open_private_ports: bool = ..., + listeners: Optional[Any] = ..., + database_conf: Optional[Any] = ..., + tls_certificate_path: Optional[str] = ..., + tls_private_key_path: Optional[str] = ..., + acme_domain: Optional[str] = ..., + ): ... + @classmethod + def load_or_generate_config(cls, description: Any, argv: Any): ... + @classmethod + def load_config(cls, description: Any, argv: Any): ... + @classmethod + def add_arguments_to_parser(cls, config_parser: Any) -> None: ... + @classmethod + def load_config_with_parser(cls, parser: Any, argv: Any): ... + def generate_missing_files( + self, config_dict: dict, config_dir_path: str + ) -> None: ... + +class Config: + root: RootConfig + def __init__(self, root_config: Optional[RootConfig] = ...) -> None: ... + def __getattr__(self, item: str, from_root: bool = ...): ... + @staticmethod + def parse_size(value: Any): ... + @staticmethod + def parse_duration(value: Any): ... + @staticmethod + def abspath(file_path: Optional[str]): ... + @classmethod + def path_exists(cls, file_path: str): ... + @classmethod + def check_file(cls, file_path: str, config_name: str): ... + @classmethod + def ensure_directory(cls, dir_path: str): ... + @classmethod + def read_file(cls, file_path: str, config_name: str): ... + +def read_config_files(config_files: List[str]): ... +def find_config_files(search_paths: List[str]): ... diff --git a/synapse/config/api.py b/synapse/config/api.py index dddea79a8a..74cd53a8ed 100644 --- a/synapse/config/api.py +++ b/synapse/config/api.py @@ -18,6 +18,8 @@ from ._base import Config class ApiConfig(Config): + section = "api" + def read_config(self, config, **kwargs): self.room_invite_state_types = config.get( "room_invite_state_types", diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index 28d36b1bc3..9b4682222d 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -30,6 +30,8 @@ logger = logging.getLogger(__name__) class AppServiceConfig(Config): + section = "appservice" + def read_config(self, config, **kwargs): self.app_service_config_files = config.get("app_service_config_files", []) self.notify_appservices = config.get("notify_appservices", True) diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index 8dac8152cf..44bd5c6799 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -16,6 +16,8 @@ from ._base import Config class CaptchaConfig(Config): + section = "captcha" + def read_config(self, config, **kwargs): self.recaptcha_private_key = config.get("recaptcha_private_key") self.recaptcha_public_key = config.get("recaptcha_public_key") diff --git a/synapse/config/cas.py b/synapse/config/cas.py index ebe34d933b..b916c3aa66 100644 --- a/synapse/config/cas.py +++ b/synapse/config/cas.py @@ -22,6 +22,8 @@ class CasConfig(Config): cas_server_url: URL of CAS server """ + section = "cas" + def read_config(self, config, **kwargs): cas_config = config.get("cas_config", None) if cas_config: diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index 48976e17b1..62c4c44d60 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -73,6 +73,9 @@ DEFAULT_CONFIG = """\ class ConsentConfig(Config): + + section = "consent" + def __init__(self, *args): super(ConsentConfig, self).__init__(*args) diff --git a/synapse/config/database.py b/synapse/config/database.py index 118aafbd4a..0e2509f0b1 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -21,6 +21,8 @@ from ._base import Config class DatabaseConfig(Config): + section = "database" + def read_config(self, config, **kwargs): self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index d9b43de660..658897a77e 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -28,6 +28,8 @@ from ._base import Config, ConfigError class EmailConfig(Config): + section = "email" + def read_config(self, config, **kwargs): # TODO: We should separate better the email configuration from the notification # and account validity config. diff --git a/synapse/config/groups.py b/synapse/config/groups.py index 2a522b5f44..d6862d9a64 100644 --- a/synapse/config/groups.py +++ b/synapse/config/groups.py @@ -17,6 +17,8 @@ from ._base import Config class GroupsConfig(Config): + section = "groups" + def read_config(self, config, **kwargs): self.enable_group_creation = config.get("enable_group_creation", False) self.group_creation_prefix = config.get("group_creation_prefix", "") diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 72acad4f18..6e348671c7 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from ._base import RootConfig from .api import ApiConfig from .appservice import AppServiceConfig from .captcha import CaptchaConfig @@ -46,36 +47,37 @@ from .voip import VoipConfig from .workers import WorkerConfig -class HomeServerConfig( - ServerConfig, - TlsConfig, - DatabaseConfig, - LoggingConfig, - RatelimitConfig, - ContentRepositoryConfig, - CaptchaConfig, - VoipConfig, - RegistrationConfig, - MetricsConfig, - ApiConfig, - AppServiceConfig, - KeyConfig, - SAML2Config, - CasConfig, - JWTConfig, - PasswordConfig, - EmailConfig, - WorkerConfig, - PasswordAuthProviderConfig, - PushConfig, - SpamCheckerConfig, - GroupsConfig, - UserDirectoryConfig, - ConsentConfig, - StatsConfig, - ServerNoticesConfig, - RoomDirectoryConfig, - ThirdPartyRulesConfig, - TracerConfig, -): - pass +class HomeServerConfig(RootConfig): + + config_classes = [ + ServerConfig, + TlsConfig, + DatabaseConfig, + LoggingConfig, + RatelimitConfig, + ContentRepositoryConfig, + CaptchaConfig, + VoipConfig, + RegistrationConfig, + MetricsConfig, + ApiConfig, + AppServiceConfig, + KeyConfig, + SAML2Config, + CasConfig, + JWTConfig, + PasswordConfig, + EmailConfig, + WorkerConfig, + PasswordAuthProviderConfig, + PushConfig, + SpamCheckerConfig, + GroupsConfig, + UserDirectoryConfig, + ConsentConfig, + StatsConfig, + ServerNoticesConfig, + RoomDirectoryConfig, + ThirdPartyRulesConfig, + TracerConfig, + ] diff --git a/synapse/config/jwt_config.py b/synapse/config/jwt_config.py index 36d87cef03..a568726985 100644 --- a/synapse/config/jwt_config.py +++ b/synapse/config/jwt_config.py @@ -23,6 +23,8 @@ MISSING_JWT = """Missing jwt library. This is required for jwt login. class JWTConfig(Config): + section = "jwt" + def read_config(self, config, **kwargs): jwt_config = config.get("jwt_config", None) if jwt_config: diff --git a/synapse/config/key.py b/synapse/config/key.py index f039f96e9c..ec5d430afb 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -92,6 +92,8 @@ class TrustedKeyServer(object): class KeyConfig(Config): + section = "key" + def read_config(self, config, config_dir_path, **kwargs): # the signing key can be specified inline or in a separate file if "signing_key" in config: diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 767ecfdf09..d609ec111b 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -84,6 +84,8 @@ root: class LoggingConfig(Config): + section = "logging" + def read_config(self, config, **kwargs): self.log_config = self.abspath(config.get("log_config")) self.no_redirect_stdio = config.get("no_redirect_stdio", False) diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index ec35a6b868..282a43bddb 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -34,6 +34,8 @@ class MetricsFlags(object): class MetricsConfig(Config): + section = "metrics" + def read_config(self, config, **kwargs): self.enable_metrics = config.get("enable_metrics", False) self.report_stats = config.get("report_stats", None) diff --git a/synapse/config/password.py b/synapse/config/password.py index d5b5953f2f..2a634ac751 100644 --- a/synapse/config/password.py +++ b/synapse/config/password.py @@ -20,6 +20,8 @@ class PasswordConfig(Config): """Password login configuration """ + section = "password" + def read_config(self, config, **kwargs): password_config = config.get("password_config", {}) if password_config is None: diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index c50e244394..9746bbc681 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -23,6 +23,8 @@ LDAP_PROVIDER = "ldap_auth_provider.LdapAuthProvider" class PasswordAuthProviderConfig(Config): + section = "authproviders" + def read_config(self, config, **kwargs): self.password_providers = [] # type: List[Any] providers = [] diff --git a/synapse/config/push.py b/synapse/config/push.py index 1b932722a5..0910958649 100644 --- a/synapse/config/push.py +++ b/synapse/config/push.py @@ -18,6 +18,8 @@ from ._base import Config class PushConfig(Config): + section = "push" + def read_config(self, config, **kwargs): push_config = config.get("push", {}) self.push_include_content = push_config.get("include_content", True) diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 587e2862b7..947f653e03 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -36,6 +36,8 @@ class FederationRateLimitConfig(object): class RatelimitConfig(Config): + section = "ratelimiting" + def read_config(self, config, **kwargs): # Load the new-style messages config if it exists. Otherwise fall back diff --git a/synapse/config/registration.py b/synapse/config/registration.py index bef89e2bf4..b3e3e6dda2 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -24,6 +24,8 @@ from synapse.util.stringutils import random_string_with_symbols class AccountValidityConfig(Config): + section = "accountvalidity" + def __init__(self, config, synapse_config): self.enabled = config.get("enabled", False) self.renew_by_email_enabled = "renew_at" in config @@ -77,6 +79,8 @@ class AccountValidityConfig(Config): class RegistrationConfig(Config): + section = "registration" + def read_config(self, config, **kwargs): self.enable_registration = bool( strtobool(str(config.get("enable_registration", False))) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 14740891f3..d0205e14b9 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -78,6 +78,8 @@ def parse_thumbnail_requirements(thumbnail_sizes): class ContentRepositoryConfig(Config): + section = "media" + def read_config(self, config, **kwargs): # Only enable the media repo if either the media repo is enabled or the diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index a92693017b..7c9f05bde4 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -19,6 +19,8 @@ from ._base import Config, ConfigError class RoomDirectoryConfig(Config): + section = "roomdirectory" + def read_config(self, config, **kwargs): self.enable_room_list_search = config.get("enable_room_list_search", True) diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index ab34b41ca8..c407e13680 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -55,6 +55,8 @@ def _dict_merge(merge_dict, into_dict): class SAML2Config(Config): + section = "saml2" + def read_config(self, config, **kwargs): self.saml2_enabled = False diff --git a/synapse/config/server.py b/synapse/config/server.py index 709bd387e5..afc4d6a4ab 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -58,6 +58,8 @@ on how to configure the new listener. class ServerConfig(Config): + section = "server" + def read_config(self, config, **kwargs): self.server_name = config["server_name"] self.server_context = config.get("server_context", None) diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index 6d4285ef93..6ea2ea8869 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -59,6 +59,8 @@ class ServerNoticesConfig(Config): None if server notices are not enabled. """ + section = "servernotices" + def __init__(self, *args): super(ServerNoticesConfig, self).__init__(*args) self.server_notices_mxid = None diff --git a/synapse/config/spam_checker.py b/synapse/config/spam_checker.py index e40797ab50..36e0ddab5c 100644 --- a/synapse/config/spam_checker.py +++ b/synapse/config/spam_checker.py @@ -19,6 +19,8 @@ from ._base import Config class SpamCheckerConfig(Config): + section = "spamchecker" + def read_config(self, config, **kwargs): self.spam_checker = None diff --git a/synapse/config/stats.py b/synapse/config/stats.py index b18ddbd1fa..62485189ea 100644 --- a/synapse/config/stats.py +++ b/synapse/config/stats.py @@ -25,6 +25,8 @@ class StatsConfig(Config): Configuration for the behaviour of synapse's stats engine """ + section = "stats" + def read_config(self, config, **kwargs): self.stats_enabled = True self.stats_bucket_size = 86400 * 1000 diff --git a/synapse/config/third_party_event_rules.py b/synapse/config/third_party_event_rules.py index b3431441b9..10a99c792e 100644 --- a/synapse/config/third_party_event_rules.py +++ b/synapse/config/third_party_event_rules.py @@ -19,6 +19,8 @@ from ._base import Config class ThirdPartyRulesConfig(Config): + section = "thirdpartyrules" + def read_config(self, config, **kwargs): self.third_party_event_rules = None diff --git a/synapse/config/tls.py b/synapse/config/tls.py index fc47ba3e9a..f06341eb67 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -18,6 +18,7 @@ import os import warnings from datetime import datetime from hashlib import sha256 +from typing import List import six @@ -33,7 +34,9 @@ logger = logging.getLogger(__name__) class TlsConfig(Config): - def read_config(self, config, config_dir_path, **kwargs): + section = "tls" + + def read_config(self, config: dict, config_dir_path: str, **kwargs): acme_config = config.get("acme", None) if acme_config is None: @@ -57,7 +60,7 @@ class TlsConfig(Config): self.tls_certificate_file = self.abspath(config.get("tls_certificate_path")) self.tls_private_key_file = self.abspath(config.get("tls_private_key_path")) - if self.has_tls_listener(): + if self.root.server.has_tls_listener(): if not self.tls_certificate_file: raise ConfigError( "tls_certificate_path must be specified if TLS-enabled listeners are " @@ -108,7 +111,7 @@ class TlsConfig(Config): ) # Support globs (*) in whitelist values - self.federation_certificate_verification_whitelist = [] + self.federation_certificate_verification_whitelist = [] # type: List[str] for entry in fed_whitelist_entries: try: entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii")) diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index 85d99a3166..8be1346113 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -19,6 +19,8 @@ from ._base import Config, ConfigError class TracerConfig(Config): + section = "tracing" + def read_config(self, config, **kwargs): opentracing_config = config.get("opentracing") if opentracing_config is None: diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index f6313e17d4..c8d19c5d6b 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -21,6 +21,8 @@ class UserDirectoryConfig(Config): Configuration for the behaviour of the /user_directory API """ + section = "userdirectory" + def read_config(self, config, **kwargs): self.user_directory_search_enabled = True self.user_directory_search_all_users = False diff --git a/synapse/config/voip.py b/synapse/config/voip.py index 2ca0e1cf70..a68a3068aa 100644 --- a/synapse/config/voip.py +++ b/synapse/config/voip.py @@ -16,6 +16,8 @@ from ._base import Config class VoipConfig(Config): + section = "voip" + def read_config(self, config, **kwargs): self.turn_uris = config.get("turn_uris", []) self.turn_shared_secret = config.get("turn_shared_secret") diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 1ec4998625..fef72ed974 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -21,6 +21,8 @@ class WorkerConfig(Config): They have their own pid_file and listener configuration. They use the replication_url to talk to the main synapse process.""" + section = "worker" + def read_config(self, config, **kwargs): self.worker_app = config.get("worker_app") diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py index b02780772a..1be6ff563b 100644 --- a/tests/config/test_tls.py +++ b/tests/config/test_tls.py @@ -21,17 +21,24 @@ import yaml from OpenSSL import SSL +from synapse.config._base import Config, RootConfig from synapse.config.tls import ConfigError, TlsConfig from synapse.crypto.context_factory import ClientTLSOptionsFactory from tests.unittest import TestCase -class TestConfig(TlsConfig): +class FakeServer(Config): + section = "server" + def has_tls_listener(self): return False +class TestConfig(RootConfig): + config_classes = [FakeServer, TlsConfig] + + class TLSConfigTests(TestCase): def test_warn_self_signed(self): """ @@ -202,13 +209,13 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg= conf = TestConfig() conf.read_config( yaml.safe_load( - TestConfig().generate_config_section( + TestConfig().generate_config( "/config_dir_path", "my_super_secure_server", "/data_dir_path", - "/tls_cert_path", - "tls_private_key", - None, # This is the acme_domain + tls_certificate_path="/tls_cert_path", + tls_private_key_path="tls_private_key", + acme_domain=None, # This is the acme_domain ) ), "/config_dir_path", @@ -223,13 +230,13 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg= conf = TestConfig() conf.read_config( yaml.safe_load( - TestConfig().generate_config_section( + TestConfig().generate_config( "/config_dir_path", "my_super_secure_server", "/data_dir_path", - "/tls_cert_path", - "tls_private_key", - "my_supe_secure_server", # This is the acme_domain + tls_certificate_path="/tls_cert_path", + tls_private_key_path="tls_private_key", + acme_domain="my_supe_secure_server", # This is the acme_domain ) ), "/config_dir_path", diff --git a/tox.ini b/tox.ini index 1bce10a4ce..367cc2ccf2 100644 --- a/tox.ini +++ b/tox.ini @@ -163,10 +163,9 @@ deps = {[base]deps} mypy mypy-zope - typeshed env = MYPYPATH = stubs/ extras = all -commands = mypy --show-traceback \ +commands = mypy --show-traceback --check-untyped-defs --show-error-codes --follow-imports=normal \ synapse/logging/ \ synapse/config/ -- cgit 1.4.1 From da815c1f695ceca56643d7814c96f7a3cfa3c70a Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 10 Oct 2019 10:06:45 +0100 Subject: Move tag/push rules room upgrade checking ealier (#6155) It turns out that _local_membership_update doesn't run when you join a new, remote room. It only runs if you're joining a room that your server already knows about. This would explain #4703 and #5295 and why the transfer would work in testing and some rooms, but not others. This would especially hit single-user homeservers. The check has been moved to right after the room has been joined, and works much more reliably. (Though it may still be a bit awkward of a place). --- changelog.d/6155.bugfix | 1 + synapse/handlers/room_member.py | 62 +++++++++++++++++++++++++++++------------ 2 files changed, 45 insertions(+), 18 deletions(-) create mode 100644 changelog.d/6155.bugfix (limited to 'synapse') diff --git a/changelog.d/6155.bugfix b/changelog.d/6155.bugfix new file mode 100644 index 0000000000..e32c0dce09 --- /dev/null +++ b/changelog.d/6155.bugfix @@ -0,0 +1 @@ +Fix transferring notifications and tags when joining an upgraded room that is new to your server. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 95a244d86c..380e2fad5e 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -203,23 +203,11 @@ class RoomMemberHandler(object): prev_member_event = yield self.store.get_event(prev_member_event_id) newly_joined = prev_member_event.membership != Membership.JOIN if newly_joined: - yield self._user_joined_room(target, room_id) - - # Copy over direct message status and room tags if this is a join - # on an upgraded room - - # Check if this is an upgraded room - predecessor = yield self.store.get_room_predecessor(room_id) - - if predecessor: - # It is an upgraded room. Copy over old tags - self.copy_room_tags_and_direct_to_room( - predecessor["room_id"], room_id, user_id - ) - # Copy over push rules - yield self.store.copy_push_rules_from_room_to_room_for_user( - predecessor["room_id"], room_id, user_id + # Copy over user state if we're joining an upgraded room + yield self.copy_user_state_if_room_upgrade( + room_id, requester.user.to_string() ) + yield self._user_joined_room(target, room_id) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) @@ -463,10 +451,16 @@ class RoomMemberHandler(object): if requester.is_guest: content["kind"] = "guest" - ret = yield self._remote_join( + remote_join_response = yield self._remote_join( requester, remote_room_hosts, room_id, target, content ) - return ret + + # Copy over user state if this is a join on an remote upgraded room + yield self.copy_user_state_if_room_upgrade( + room_id, requester.user.to_string() + ) + + return remote_join_response elif effective_membership_state == Membership.LEAVE: if not is_host_in_room: @@ -503,6 +497,38 @@ class RoomMemberHandler(object): ) return res + @defer.inlineCallbacks + def copy_user_state_if_room_upgrade(self, new_room_id, user_id): + """Copy user-specific information when they join a new room if that new room is the + result of a room upgrade + + Args: + new_room_id (str): The ID of the room the user is joining + user_id (str): The ID of the user + + Returns: + Deferred + """ + # Check if the new room is an upgraded room + predecessor = yield self.store.get_room_predecessor(new_room_id) + if not predecessor: + return + + logger.debug( + "Found predecessor for %s: %s. Copying over room tags and push " "rules", + new_room_id, + predecessor, + ) + + # It is an upgraded room. Copy over old tags + yield self.copy_room_tags_and_direct_to_room( + predecessor["room_id"], new_room_id, user_id + ) + # Copy over push rules + yield self.store.copy_push_rules_from_room_to_room_for_user( + predecessor["room_id"], new_room_id, user_id + ) + @defer.inlineCallbacks def send_membership_event(self, requester, event, context, ratelimit=True): """ -- cgit 1.4.1 From 1d6dd1c2944c22147258dda8ccf2777c68b38fba Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 10:53:06 +0100 Subject: Move patch_inline_callbacks into synapse/ --- synapse/__init__.py | 2 +- synapse/util/patch_inline_callbacks.py | 179 +++++++++++++++++++++++++++++++++ tests/__init__.py | 4 +- tests/patch_inline_callbacks.py | 179 --------------------------------- 4 files changed, 182 insertions(+), 182 deletions(-) create mode 100644 synapse/util/patch_inline_callbacks.py delete mode 100644 tests/patch_inline_callbacks.py (limited to 'synapse') diff --git a/synapse/__init__.py b/synapse/__init__.py index 1055f54e00..bf102244a9 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -20,7 +20,7 @@ import os import sys -from tests.patch_inline_callbacks import do_patch +from synapse.util.patch_inline_callbacks import do_patch # Check that we're not running on an unsupported Python version. if sys.version_info < (3, 5): diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py new file mode 100644 index 0000000000..4fb49b0b2b --- /dev/null +++ b/synapse/util/patch_inline_callbacks.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import functools +import sys + +from twisted.internet import defer +from twisted.internet.defer import Deferred +from twisted.python.failure import Failure + + +def do_patch(): + """ + Patch defer.inlineCallbacks so that it checks the state of the logcontext on exit + """ + + from synapse.logging.context import LoggingContext + + orig_inline_callbacks = defer.inlineCallbacks + if hasattr(orig_inline_callbacks, "patched_by_synapse"): + return + + def new_inline_callbacks(f): + @functools.wraps(f) + def wrapped(*args, **kwargs): + start_context = LoggingContext.current_context() + changes = [] + orig = orig_inline_callbacks(_check_yield_points(f, changes, start_context)) + + try: + res = orig(*args, **kwargs) + except Exception: + if LoggingContext.current_context() != start_context: + for err in changes: + print(err, file=sys.stderr) + + err = "%s changed context from %s to %s on exception" % ( + f, + start_context, + LoggingContext.current_context(), + ) + print(err, file=sys.stderr) + raise Exception(err) + raise + + if not isinstance(res, Deferred) or res.called: + if LoggingContext.current_context() != start_context: + for err in changes: + print(err, file=sys.stderr) + + err = "Completed %s changed context from %s to %s" % ( + f, + start_context, + LoggingContext.current_context(), + ) + # print the error to stderr because otherwise all we + # see in travis-ci is the 500 error + print(err, file=sys.stderr) + raise Exception(err) + return res + + if LoggingContext.current_context() != LoggingContext.sentinel: + err = ( + "%s returned incomplete deferred in non-sentinel context " + "%s (start was %s)" + ) % (f, LoggingContext.current_context(), start_context) + print(err, file=sys.stderr) + raise Exception(err) + + def check_ctx(r): + if LoggingContext.current_context() != start_context: + for err in changes: + print(err, file=sys.stderr) + err = "%s completion of %s changed context from %s to %s" % ( + "Failure" if isinstance(r, Failure) else "Success", + f, + start_context, + LoggingContext.current_context(), + ) + print(err, file=sys.stderr) + raise Exception(err) + return r + + res.addBoth(check_ctx) + return res + + return wrapped + + defer.inlineCallbacks = new_inline_callbacks + new_inline_callbacks.patched_by_synapse = True + + +def _check_yield_points(f, changes, start_context): + """Wraps a generator that is about to be passed to defer.inlineCallbacks + checking that after every yield the log contexts are correct. + """ + + from synapse.logging.context import LoggingContext + + @functools.wraps(f) + def check_yield_points_inner(*args, **kwargs): + expected_context = start_context + + gen = f(*args, **kwargs) + + last_yield_line_no = 1 + result = None + while True: + try: + isFailure = isinstance(result, Failure) + if isFailure: + d = result.throwExceptionIntoGenerator(gen) + else: + d = gen.send(result) + except (StopIteration, defer._DefGen_Return) as e: + if LoggingContext.current_context() != expected_context: + # This happens when the context is lost sometime *after* the + # final yield and returning. E.g. we forgot to yield on a + # function that returns a deferred. + err = ( + "Function %r returned and changed context from %s to %s," + " in %s between %d and end of func" + % ( + f.__qualname__, + start_context, + LoggingContext.current_context(), + f.__code__.co_filename, + last_yield_line_no, + ) + ) + changes.append(err) + # raise Exception(err) + return getattr(e, "value", None) + + try: + result = yield d + except Exception as e: + result = Failure(e) + + frame = gen.gi_frame + + if LoggingContext.current_context() != expected_context: + # This happens because the context is lost sometime *after* the + # previous yield and *after* the current yield. E.g. the + # deferred we waited on didn't follow the rules, or we forgot to + # yield on a function between the two yield points. + err = ( + "%s changed context from %s to %s, happened between lines %d and %d in %s" + % ( + frame.f_code.co_name, + start_context, + LoggingContext.current_context(), + last_yield_line_no, + frame.f_lineno, + frame.f_code.co_filename, + ) + ) + changes.append(err) + # raise Exception(err) + + expected_context = LoggingContext.current_context() + + last_yield_line_no = frame.f_lineno + + return check_yield_points_inner diff --git a/tests/__init__.py b/tests/__init__.py index f7fc502f01..ed805db1c2 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -16,9 +16,9 @@ from twisted.trial import util -import tests.patch_inline_callbacks +from synapse.util.patch_inline_callbacks import do_patch # attempt to do the patch before we load any synapse code -tests.patch_inline_callbacks.do_patch() +do_patch() util.DEFAULT_TIMEOUT_DURATION = 20 diff --git a/tests/patch_inline_callbacks.py b/tests/patch_inline_callbacks.py deleted file mode 100644 index a35a1d3305..0000000000 --- a/tests/patch_inline_callbacks.py +++ /dev/null @@ -1,179 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import functools -import sys - -from twisted.internet import defer -from twisted.internet.defer import Deferred -from twisted.python.failure import Failure - - -def do_patch(): - """ - Patch defer.inlineCallbacks so that it checks the state of the logcontext on exit - """ - - from synapse.logging.context import LoggingContext - - orig_inline_callbacks = defer.inlineCallbacks - if hasattr(orig_inline_callbacks, "patched_by_synapse"): - return - - def new_inline_callbacks(f): - @functools.wraps(f) - def wrapped(*args, **kwargs): - start_context = LoggingContext.current_context() - changes = [] - orig = orig_inline_callbacks(_check_yield_points(f, changes, start_context)) - - try: - res = orig(*args, **kwargs) - except Exception: - if LoggingContext.current_context() != start_context: - for err in changes: - print(err, file=sys.stderr) - - err = "%s changed context from %s to %s on exception" % ( - f, - start_context, - LoggingContext.current_context(), - ) - print(err, file=sys.stderr) - raise Exception(err) - raise - - if not isinstance(res, Deferred) or res.called: - if LoggingContext.current_context() != start_context: - for err in changes: - print(err, file=sys.stderr) - - err = "Completed %s changed context from %s to %s" % ( - f, - start_context, - LoggingContext.current_context(), - ) - # print the error to stderr because otherwise all we - # see in travis-ci is the 500 error - print(err, file=sys.stderr) - raise Exception(err) - return res - - if LoggingContext.current_context() != LoggingContext.sentinel: - err = ( - "%s returned incomplete deferred in non-sentinel context " - "%s (start was %s)" - ) % (f, LoggingContext.current_context(), start_context) - print(err, file=sys.stderr) - raise Exception(err) - - def check_ctx(r): - if LoggingContext.current_context() != start_context: - for err in changes: - print(err, file=sys.stderr) - err = "%s completion of %s changed context from %s to %s" % ( - "Failure" if isinstance(r, Failure) else "Success", - f, - start_context, - LoggingContext.current_context(), - ) - print(err, file=sys.stderr) - raise Exception(err) - return r - - res.addBoth(check_ctx) - return res - - return wrapped - - defer.inlineCallbacks = new_inline_callbacks - new_inline_callbacks.patched_by_synapse = True - - -def _check_yield_points(f, changes, start_context): - """Wraps a generator that is about to passed to defer.inlineCallbacks - checking that after every yield the log contexts are correct. - """ - - from synapse.logging.context import LoggingContext - - @functools.wraps(f) - def check_yield_points_inner(*args, **kwargs): - expected_context = start_context - - gen = f(*args, **kwargs) - - last_yield_line_no = 1 - result = None - while True: - try: - isFailure = isinstance(result, Failure) - if isFailure: - d = result.throwExceptionIntoGenerator(gen) - else: - d = gen.send(result) - except (StopIteration, defer._DefGen_Return) as e: - if LoggingContext.current_context() != expected_context: - # This happens when the context is lost sometime *after* the - # final yield and returning. E.g. we forgot to yield on a - # function that returns a deferred. - err = ( - "Function %r returned and changed context from %s to %s," - " in %s between %d and end of func" - % ( - f.__qualname__, - start_context, - LoggingContext.current_context(), - f.__code__.co_filename, - last_yield_line_no, - ) - ) - changes.append(err) - # raise Exception(err) - return getattr(e, "value", None) - - try: - result = yield d - except Exception as e: - result = Failure(e) - - frame = gen.gi_frame - - if LoggingContext.current_context() != expected_context: - # This happens because the context is lost sometime *after* the - # previous yield and *after* the current yield. E.g. the - # deferred we waited on didn't follow the rules, or we forgot to - # yield on a function between the two yield points. - err = ( - "%s changed context from %s to %s, happened between lines %d and %d in %s" - % ( - frame.f_code.co_name, - start_context, - LoggingContext.current_context(), - last_yield_line_no, - frame.f_lineno, - frame.f_code.co_filename, - ) - ) - changes.append(err) - # raise Exception(err) - - expected_context = LoggingContext.current_context() - - last_yield_line_no = frame.f_lineno - - return check_yield_points_inner -- cgit 1.4.1 From 3e4272961a4cb659513bccd981cbd42f4e506362 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 10:58:32 +0100 Subject: Test for sentinel commit --- synapse/util/patch_inline_callbacks.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py index 4fb49b0b2b..4a45824f52 100644 --- a/synapse/util/patch_inline_callbacks.py +++ b/synapse/util/patch_inline_callbacks.py @@ -146,13 +146,30 @@ def _check_yield_points(f, changes, start_context): # raise Exception(err) return getattr(e, "value", None) + frame = gen.gi_frame + + if isinstance(d, defer.Deferred): + # This happens if we yield on a deferred that doesn't follow + # the log context rules without wrappin in a `make_deferred_yieldable` + if LoggingContext.current_context() != LoggingContext.Sentinel: + err = ( + "%s yielded with context %s rather than Sentinel," + " yielded on line %d in %s" + % ( + frame.f_code.co_name, + start_context, + LoggingContext.current_context(), + frame.f_lineno, + frame.f_code.co_filename, + ) + ) + changes.append(err) + try: result = yield d except Exception as e: result = Failure(e) - frame = gen.gi_frame - if LoggingContext.current_context() != expected_context: # This happens because the context is lost sometime *after* the # previous yield and *after* the current yield. E.g. the -- cgit 1.4.1 From ec0596f2ab4502c9a6183813a7e5dc2a5bfedd48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 10:59:07 +0100 Subject: Log correct context --- synapse/util/patch_inline_callbacks.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'synapse') diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py index 4a45824f52..5ef7190b14 100644 --- a/synapse/util/patch_inline_callbacks.py +++ b/synapse/util/patch_inline_callbacks.py @@ -117,7 +117,7 @@ def _check_yield_points(f, changes, start_context): gen = f(*args, **kwargs) - last_yield_line_no = 1 + last_yield_line_no = gen.gi_frame.f_lineno result = None while True: try: @@ -136,7 +136,7 @@ def _check_yield_points(f, changes, start_context): " in %s between %d and end of func" % ( f.__qualname__, - start_context, + expected_context, LoggingContext.current_context(), f.__code__.co_filename, last_yield_line_no, @@ -148,22 +148,22 @@ def _check_yield_points(f, changes, start_context): frame = gen.gi_frame - if isinstance(d, defer.Deferred): + if isinstance(d, defer.Deferred) and not d.called: # This happens if we yield on a deferred that doesn't follow # the log context rules without wrappin in a `make_deferred_yieldable` - if LoggingContext.current_context() != LoggingContext.Sentinel: + if LoggingContext.current_context() is not LoggingContext.sentinel: err = ( - "%s yielded with context %s rather than Sentinel," + "%s yielded with context %s rather than sentinel," " yielded on line %d in %s" % ( frame.f_code.co_name, - start_context, LoggingContext.current_context(), frame.f_lineno, frame.f_code.co_filename, ) ) changes.append(err) + # raise Exception(err) try: result = yield d -- cgit 1.4.1 From 128d5948c4e0066f1263b347198f4754e72010c8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 11:16:26 +0100 Subject: Fix packaging --- synapse/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/__init__.py b/synapse/__init__.py index bf102244a9..56df3f5ac6 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -20,8 +20,6 @@ import os import sys -from synapse.util.patch_inline_callbacks import do_patch - # Check that we're not running on an unsupported Python version. if sys.version_info < (3, 5): print("Synapse requires Python 3.5 or above.") @@ -41,4 +39,8 @@ except ImportError: __version__ = "1.4.0" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): + # We import here so that we don't have to install a bunch of deps when + # running the packaging tox test. + from synapse.util.patch_inline_callbacks import do_patch + do_patch() -- cgit 1.4.1 From 562b4e51dd0e7d4a6f776502b9ac357ed3428445 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 10 Oct 2019 11:28:23 +0100 Subject: Rewrite the user_filter migration again (#6184) you can't plausibly ALTER TABLE in sqlite, so we create the new table with the right schema to start with. --- changelog.d/6184.misc | 1 + .../schema/delta/56/unique_user_filter_index.py | 58 ++++++++++++---------- 2 files changed, 33 insertions(+), 26 deletions(-) create mode 100644 changelog.d/6184.misc (limited to 'synapse') diff --git a/changelog.d/6184.misc b/changelog.d/6184.misc new file mode 100644 index 0000000000..30b3e56082 --- /dev/null +++ b/changelog.d/6184.misc @@ -0,0 +1 @@ +Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this. \ No newline at end of file diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py index 60031f23ca..1de8b54961 100644 --- a/synapse/storage/schema/delta/56/unique_user_filter_index.py +++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py @@ -5,42 +5,48 @@ from synapse.storage.engines import PostgresEngine logger = logging.getLogger(__name__) +""" +This migration updates the user_filters table as follows: + + - drops any (user_id, filter_id) duplicates + - makes the columns NON-NULLable + - turns the index into a UNIQUE index +""" + + def run_upgrade(cur, database_engine, *args, **kwargs): + pass + + +def run_create(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): select_clause = """ - CREATE TEMPORARY TABLE user_filters_migration AS SELECT DISTINCT ON (user_id, filter_id) user_id, filter_id, filter_json - FROM user_filters; + FROM user_filters """ else: select_clause = """ - CREATE TEMPORARY TABLE user_filters_migration AS - SELECT * FROM user_filters GROUP BY user_id, filter_id; + SELECT * FROM user_filters GROUP BY user_id, filter_id """ - sql = ( - """ - BEGIN; - %s - DROP INDEX user_filters_by_user_id_filter_id; - DELETE FROM user_filters; - ALTER TABLE user_filters - ALTER COLUMN user_id SET NOT NULL, - ALTER COLUMN filter_id SET NOT NULL, - ALTER COLUMN filter_json SET NOT NULL; - INSERT INTO user_filters(user_id, filter_id, filter_json) - SELECT * FROM user_filters_migration; - DROP TABLE user_filters_migration; - CREATE UNIQUE INDEX user_filters_by_user_id_filter_id_unique - ON user_filters(user_id, filter_id); - END; - """ - % select_clause + sql = """ + DROP TABLE IF EXISTS user_filters_migration; + DROP INDEX IF EXISTS user_filters_unique; + CREATE TABLE user_filters_migration ( + user_id TEXT NOT NULL, + filter_id BIGINT NOT NULL, + filter_json BYTEA NOT NULL + ); + INSERT INTO user_filters_migration (user_id, filter_id, filter_json) + %s; + CREATE UNIQUE INDEX user_filters_unique ON user_filters_migration + (user_id, filter_id); + DROP TABLE user_filters; + ALTER TABLE user_filters_migration RENAME TO user_filters; + """ % ( + select_clause, ) + if isinstance(database_engine, PostgresEngine): cur.execute(sql) else: cur.executescript(sql) - - -def run_create(cur, database_engine, *args, **kwargs): - pass -- cgit 1.4.1 From a139420a3cfda6a4a4ee4750611b31dd71fc33f3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 10 Oct 2019 11:29:01 +0100 Subject: Fix races in room stats (and other) updates. (#6187) Hopefully this will fix the occasional failures we were seeing in the room directory. The problem was that events are not necessarily persisted (and `current_state_delta_stream` updated) in the same order as their stream_id. So for instance current_state_delta 9 might be persisted *before* current_state_delta 8. Then, when the room stats saw stream_id 9, it assumed it had done everything up to 9, and never came back to do stream_id 8. We can solve this easily by only processing up to the stream_id where we know all events have been persisted. --- changelog.d/6187.bugfix | 1 + synapse/handlers/presence.py | 16 ++++++++++++---- synapse/handlers/stats.py | 12 +++++++----- synapse/handlers/user_directory.py | 17 ++++++++++++----- synapse/storage/state_deltas.py | 38 +++++++++++++++++++++++++++++--------- tests/handlers/test_typing.py | 2 +- tests/rest/admin/test_admin.py | 2 +- 7 files changed, 63 insertions(+), 25 deletions(-) create mode 100644 changelog.d/6187.bugfix (limited to 'synapse') diff --git a/changelog.d/6187.bugfix b/changelog.d/6187.bugfix new file mode 100644 index 0000000000..6142c5b98d --- /dev/null +++ b/changelog.d/6187.bugfix @@ -0,0 +1 @@ +Fix occasional missed updates in the room and user directories. \ No newline at end of file diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 053cf66b28..2a5f1a007d 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -803,17 +803,25 @@ class PresenceHandler(object): # Loop round handling deltas until we're up to date while True: with Measure(self.clock, "presence_delta"): - deltas = yield self.store.get_current_state_deltas(self._event_pos) - if not deltas: + room_max_stream_ordering = self.store.get_room_max_stream_ordering() + if self._event_pos == room_max_stream_ordering: return + logger.debug( + "Processing presence stats %s->%s", + self._event_pos, + room_max_stream_ordering, + ) + max_pos, deltas = yield self.store.get_current_state_deltas( + self._event_pos, room_max_stream_ordering + ) yield self._handle_state_delta(deltas) - self._event_pos = deltas[-1]["stream_id"] + self._event_pos = max_pos # Expose current event processing position to prometheus synapse.metrics.event_processing_positions.labels("presence").set( - self._event_pos + max_pos ) @defer.inlineCallbacks diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index c62b113115..466daf9202 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -87,21 +87,23 @@ class StatsHandler(StateDeltasHandler): # Be sure to read the max stream_ordering *before* checking if there are any outstanding # deltas, since there is otherwise a chance that we could miss updates which arrive # after we check the deltas. - room_max_stream_ordering = yield self.store.get_room_max_stream_ordering() + room_max_stream_ordering = self.store.get_room_max_stream_ordering() if self.pos == room_max_stream_ordering: break - deltas = yield self.store.get_current_state_deltas(self.pos) + logger.debug( + "Processing room stats %s->%s", self.pos, room_max_stream_ordering + ) + max_pos, deltas = yield self.store.get_current_state_deltas( + self.pos, room_max_stream_ordering + ) if deltas: logger.debug("Handling %d state deltas", len(deltas)) room_deltas, user_deltas = yield self._handle_deltas(deltas) - - max_pos = deltas[-1]["stream_id"] else: room_deltas = {} user_deltas = {} - max_pos = room_max_stream_ordering # Then count deltas for total_events and total_event_bytes. room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes( diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index e53669e40d..624f05ab5b 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -138,21 +138,28 @@ class UserDirectoryHandler(StateDeltasHandler): # Loop round handling deltas until we're up to date while True: with Measure(self.clock, "user_dir_delta"): - deltas = yield self.store.get_current_state_deltas(self.pos) - if not deltas: + room_max_stream_ordering = self.store.get_room_max_stream_ordering() + if self.pos == room_max_stream_ordering: return + logger.debug( + "Processing user stats %s->%s", self.pos, room_max_stream_ordering + ) + max_pos, deltas = yield self.store.get_current_state_deltas( + self.pos, room_max_stream_ordering + ) + logger.info("Handling %d state deltas", len(deltas)) yield self._handle_deltas(deltas) - self.pos = deltas[-1]["stream_id"] + self.pos = max_pos # Expose current event processing position to prometheus synapse.metrics.event_processing_positions.labels("user_dir").set( - self.pos + max_pos ) - yield self.store.update_user_directory_stream_pos(self.pos) + yield self.store.update_user_directory_stream_pos(max_pos) @defer.inlineCallbacks def _handle_deltas(self, deltas): diff --git a/synapse/storage/state_deltas.py b/synapse/storage/state_deltas.py index 5fdb442104..28f33ec18f 100644 --- a/synapse/storage/state_deltas.py +++ b/synapse/storage/state_deltas.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) class StateDeltasStore(SQLBaseStore): - def get_current_state_deltas(self, prev_stream_id): + def get_current_state_deltas(self, prev_stream_id: int, max_stream_id: int): """Fetch a list of room state changes since the given stream id Each entry in the result contains the following fields: @@ -36,15 +36,27 @@ class StateDeltasStore(SQLBaseStore): Args: prev_stream_id (int): point to get changes since (exclusive) + max_stream_id (int): the point that we know has been correctly persisted + - ie, an upper limit to return changes from. Returns: - Deferred[list[dict]]: results + Deferred[tuple[int, list[dict]]: A tuple consisting of: + - the stream id which these results go up to + - list of current_state_delta_stream rows. If it is empty, we are + up to date. """ prev_stream_id = int(prev_stream_id) + + # check we're not going backwards + assert prev_stream_id <= max_stream_id + if not self._curr_state_delta_stream_cache.has_any_entity_changed( prev_stream_id ): - return [] + # if the CSDs haven't changed between prev_stream_id and now, we + # know for certain that they haven't changed between prev_stream_id and + # max_stream_id. + return max_stream_id, [] def get_current_state_deltas_txn(txn): # First we calculate the max stream id that will give us less than @@ -54,21 +66,29 @@ class StateDeltasStore(SQLBaseStore): sql = """ SELECT stream_id, count(*) FROM current_state_delta_stream - WHERE stream_id > ? + WHERE stream_id > ? AND stream_id <= ? GROUP BY stream_id ORDER BY stream_id ASC LIMIT 100 """ - txn.execute(sql, (prev_stream_id,)) + txn.execute(sql, (prev_stream_id, max_stream_id)) total = 0 - max_stream_id = prev_stream_id - for max_stream_id, count in txn: + + for stream_id, count in txn: total += count if total > 100: # We arbitarily limit to 100 entries to ensure we don't # select toooo many. + logger.debug( + "Clipping current_state_delta_stream rows to stream_id %i", + stream_id, + ) + clipped_stream_id = stream_id break + else: + # if there's no problem, we may as well go right up to the max_stream_id + clipped_stream_id = max_stream_id # Now actually get the deltas sql = """ @@ -77,8 +97,8 @@ class StateDeltasStore(SQLBaseStore): WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC """ - txn.execute(sql, (prev_stream_id, max_stream_id)) - return self.cursor_to_dict(txn) + txn.execute(sql, (prev_stream_id, clipped_stream_id)) + return clipped_stream_id, self.cursor_to_dict(txn) return self.runInteraction( "get_current_state_deltas", get_current_state_deltas_txn diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 1f2ef5d01f..67f1013051 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -139,7 +139,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): defer.succeed(1) ) - self.datastore.get_current_state_deltas.return_value = None + self.datastore.get_current_state_deltas.return_value = (0, None) self.datastore.get_to_device_stream_token = lambda: 0 self.datastore.get_new_device_msgs_for_remote = lambda *args, **kargs: ([], 0) diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index 5877bb2133..d3a4f717f7 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -62,7 +62,7 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): self.device_handler.check_device_registered = Mock(return_value="FAKE") self.datastore = Mock(return_value=Mock()) - self.datastore.get_current_state_deltas = Mock(return_value=[]) + self.datastore.get_current_state_deltas = Mock(return_value=(0, [])) self.secrets = Mock() -- cgit 1.4.1 From 791a8c559bf4ea984637c047fad7d6097e34ce99 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 11:53:57 +0100 Subject: Add coments --- synapse/util/patch_inline_callbacks.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py index 5ef7190b14..b518dae256 100644 --- a/synapse/util/patch_inline_callbacks.py +++ b/synapse/util/patch_inline_callbacks.py @@ -107,6 +107,19 @@ def do_patch(): def _check_yield_points(f, changes, start_context): """Wraps a generator that is about to be passed to defer.inlineCallbacks checking that after every yield the log contexts are correct. + + Its perfectly valid for log contexts to change within a function, e.g. due + to new Measure blocks, so such changes are added to the given `changes` + list instead of triggering an exception. + + Args: + f: generator function to wrap + changes (list[str]): A list of strings detailing how the contexts + changed within a function. + start_context (LoggingContext): The initial context we're expecting + + Returns: + function """ from synapse.logging.context import LoggingContext @@ -131,6 +144,10 @@ def _check_yield_points(f, changes, start_context): # This happens when the context is lost sometime *after* the # final yield and returning. E.g. we forgot to yield on a # function that returns a deferred. + # + # We don't raise here as its perfectly valid for contexts to + # change in a function, as long as it sets the correct context + # on resolving (which is checked separately). err = ( "Function %r returned and changed context from %s to %s," " in %s between %d and end of func" @@ -143,14 +160,14 @@ def _check_yield_points(f, changes, start_context): ) ) changes.append(err) - # raise Exception(err) return getattr(e, "value", None) frame = gen.gi_frame if isinstance(d, defer.Deferred) and not d.called: # This happens if we yield on a deferred that doesn't follow - # the log context rules without wrappin in a `make_deferred_yieldable` + # the log context rules without wrappin in a `make_deferred_yieldable`. + # We raise here as this should never happen. if LoggingContext.current_context() is not LoggingContext.sentinel: err = ( "%s yielded with context %s rather than sentinel," @@ -162,8 +179,7 @@ def _check_yield_points(f, changes, start_context): frame.f_code.co_filename, ) ) - changes.append(err) - # raise Exception(err) + raise Exception(err) try: result = yield d @@ -171,10 +187,15 @@ def _check_yield_points(f, changes, start_context): result = Failure(e) if LoggingContext.current_context() != expected_context: + # This happens because the context is lost sometime *after* the # previous yield and *after* the current yield. E.g. the # deferred we waited on didn't follow the rules, or we forgot to # yield on a function between the two yield points. + # + # We don't raise here as its perfectly valid for contexts to + # change in a function, as long as it sets the correct context + # on resolving (which is checked separately). err = ( "%s changed context from %s to %s, happened between lines %d and %d in %s" % ( @@ -187,7 +208,6 @@ def _check_yield_points(f, changes, start_context): ) ) changes.append(err) - # raise Exception(err) expected_context = LoggingContext.current_context() -- cgit 1.4.1 From 941edad58355a829b49b0a43d382bbb0bf9ba021 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 12:15:17 +0100 Subject: Appease mypy --- synapse/util/patch_inline_callbacks.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'synapse') diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py index b518dae256..64a2c891c3 100644 --- a/synapse/util/patch_inline_callbacks.py +++ b/synapse/util/patch_inline_callbacks.py @@ -18,11 +18,17 @@ from __future__ import print_function import functools import sys +from typing import List, Callable, Any + from twisted.internet import defer from twisted.internet.defer import Deferred from twisted.python.failure import Failure +# Tracks if we've already patched inlineCallbacks +_already_patched = False + + def do_patch(): """ Patch defer.inlineCallbacks so that it checks the state of the logcontext on exit @@ -30,16 +36,18 @@ def do_patch(): from synapse.logging.context import LoggingContext + global _already_patched + orig_inline_callbacks = defer.inlineCallbacks - if hasattr(orig_inline_callbacks, "patched_by_synapse"): + if _already_patched: return def new_inline_callbacks(f): @functools.wraps(f) def wrapped(*args, **kwargs): start_context = LoggingContext.current_context() - changes = [] - orig = orig_inline_callbacks(_check_yield_points(f, changes, start_context)) + changes: List[str] = [] + orig = orig_inline_callbacks(_check_yield_points(f, changes)) try: res = orig(*args, **kwargs) @@ -101,10 +109,10 @@ def do_patch(): return wrapped defer.inlineCallbacks = new_inline_callbacks - new_inline_callbacks.patched_by_synapse = True + _already_patched = True -def _check_yield_points(f, changes, start_context): +def _check_yield_points(f: Callable, changes: List[str]): """Wraps a generator that is about to be passed to defer.inlineCallbacks checking that after every yield the log contexts are correct. @@ -114,9 +122,8 @@ def _check_yield_points(f, changes, start_context): Args: f: generator function to wrap - changes (list[str]): A list of strings detailing how the contexts + changes: A list of strings detailing how the contexts changed within a function. - start_context (LoggingContext): The initial context we're expecting Returns: function @@ -126,13 +133,13 @@ def _check_yield_points(f, changes, start_context): @functools.wraps(f) def check_yield_points_inner(*args, **kwargs): - expected_context = start_context - gen = f(*args, **kwargs) last_yield_line_no = gen.gi_frame.f_lineno - result = None + result: Any = None while True: + expected_context = LoggingContext.current_context() + try: isFailure = isinstance(result, Failure) if isFailure: @@ -200,7 +207,7 @@ def _check_yield_points(f, changes, start_context): "%s changed context from %s to %s, happened between lines %d and %d in %s" % ( frame.f_code.co_name, - start_context, + expected_context, LoggingContext.current_context(), last_yield_line_no, frame.f_lineno, @@ -209,8 +216,6 @@ def _check_yield_points(f, changes, start_context): ) changes.append(err) - expected_context = LoggingContext.current_context() - last_yield_line_no = frame.f_lineno return check_yield_points_inner -- cgit 1.4.1 From f735aeec65a5117c71cf0f1e5f61cb900683533a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 12:20:29 +0100 Subject: sort --- synapse/util/patch_inline_callbacks.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py index 64a2c891c3..3b78451dc0 100644 --- a/synapse/util/patch_inline_callbacks.py +++ b/synapse/util/patch_inline_callbacks.py @@ -17,14 +17,12 @@ from __future__ import print_function import functools import sys - -from typing import List, Callable, Any +from typing import Any, Callable, List from twisted.internet import defer from twisted.internet.defer import Deferred from twisted.python.failure import Failure - # Tracks if we've already patched inlineCallbacks _already_patched = False -- cgit 1.4.1 From c349e3ebafbe044022b93ca5c04d8b2fcb640c0a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 12:29:38 +0100 Subject: Fix py3.5 --- synapse/util/patch_inline_callbacks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py index 3b78451dc0..812dc88835 100644 --- a/synapse/util/patch_inline_callbacks.py +++ b/synapse/util/patch_inline_callbacks.py @@ -44,7 +44,7 @@ def do_patch(): @functools.wraps(f) def wrapped(*args, **kwargs): start_context = LoggingContext.current_context() - changes: List[str] = [] + changes = [] # type: List[str] orig = orig_inline_callbacks(_check_yield_points(f, changes)) try: -- cgit 1.4.1 From 59e0ed83065874775be350e25bb9f87da67b87c2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 12:47:07 +0100 Subject: Fix py3.5 --- synapse/util/patch_inline_callbacks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py index 812dc88835..66c3d47519 100644 --- a/synapse/util/patch_inline_callbacks.py +++ b/synapse/util/patch_inline_callbacks.py @@ -134,7 +134,7 @@ def _check_yield_points(f: Callable, changes: List[str]): gen = f(*args, **kwargs) last_yield_line_no = gen.gi_frame.f_lineno - result: Any = None + result = None # type: Any while True: expected_context = LoggingContext.current_context() -- cgit 1.4.1 From 2efd050c9db2e96fd96535dc9b1c6f54acbd163d Mon Sep 17 00:00:00 2001 From: krombel Date: Thu, 10 Oct 2019 13:59:55 +0200 Subject: send 404 as http-status when filter-id is unknown to the server (#2380) This fixed the weirdness of 400 vs 404 as http status code in the case the filter id is not known by the server. As e.g. matrix-js-sdk expects 404 to catch this situation this leads to unwanted behaviour. --- changelog.d/2380.bugfix | 1 + synapse/rest/client/v2_alpha/filter.py | 12 +++++---- synapse/rest/client/v2_alpha/sync.py | 41 ++++++++++++++++++------------- tests/rest/client/v2_alpha/test_filter.py | 2 +- 4 files changed, 33 insertions(+), 23 deletions(-) create mode 100644 changelog.d/2380.bugfix (limited to 'synapse') diff --git a/changelog.d/2380.bugfix b/changelog.d/2380.bugfix new file mode 100644 index 0000000000..eae3206031 --- /dev/null +++ b/changelog.d/2380.bugfix @@ -0,0 +1 @@ +Return an HTTP 404 instead of 400 when requesting a filter by ID that is unknown to the server. Thanks to @krombel for contributing this! diff --git a/synapse/rest/client/v2_alpha/filter.py b/synapse/rest/client/v2_alpha/filter.py index c6ddf24c8d..17a8bc7366 100644 --- a/synapse/rest/client/v2_alpha/filter.py +++ b/synapse/rest/client/v2_alpha/filter.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.api.errors import AuthError, Codes, StoreError, SynapseError +from synapse.api.errors import AuthError, NotFoundError, StoreError, SynapseError from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.types import UserID @@ -52,13 +52,15 @@ class GetFilterRestServlet(RestServlet): raise SynapseError(400, "Invalid filter_id") try: - filter = yield self.filtering.get_user_filter( + filter_collection = yield self.filtering.get_user_filter( user_localpart=target_user.localpart, filter_id=filter_id ) + except StoreError as e: + if e.code != 404: + raise + raise NotFoundError("No such filter") - return 200, filter.get_filter_json() - except (KeyError, StoreError): - raise SynapseError(400, "No such filter", errcode=Codes.NOT_FOUND) + return 200, filter_collection.get_filter_json() class CreateFilterRestServlet(RestServlet): diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index c98c5a3802..a883c8adda 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -21,7 +21,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.constants import PresenceState -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, StoreError, SynapseError from synapse.api.filtering import DEFAULT_FILTER_COLLECTION, FilterCollection from synapse.events.utils import ( format_event_for_client_v2_without_room_id, @@ -119,25 +119,32 @@ class SyncRestServlet(RestServlet): request_key = (user, timeout, since, filter_id, full_state, device_id) - if filter_id: - if filter_id.startswith("{"): - try: - filter_object = json.loads(filter_id) - set_timeline_upper_limit( - filter_object, self.hs.config.filter_timeline_limit - ) - except Exception: - raise SynapseError(400, "Invalid filter JSON") - self.filtering.check_valid_filter(filter_object) - filter = FilterCollection(filter_object) - else: - filter = yield self.filtering.get_user_filter(user.localpart, filter_id) + if filter_id is None: + filter_collection = DEFAULT_FILTER_COLLECTION + elif filter_id.startswith("{"): + try: + filter_object = json.loads(filter_id) + set_timeline_upper_limit( + filter_object, self.hs.config.filter_timeline_limit + ) + except Exception: + raise SynapseError(400, "Invalid filter JSON") + self.filtering.check_valid_filter(filter_object) + filter_collection = FilterCollection(filter_object) else: - filter = DEFAULT_FILTER_COLLECTION + try: + filter_collection = yield self.filtering.get_user_filter( + user.localpart, filter_id + ) + except StoreError as err: + if err.code != 404: + raise + # fix up the description and errcode to be more useful + raise SynapseError(400, "No such filter", errcode=Codes.INVALID_PARAM) sync_config = SyncConfig( user=user, - filter_collection=filter, + filter_collection=filter_collection, is_guest=requester.is_guest, request_key=request_key, device_id=device_id, @@ -171,7 +178,7 @@ class SyncRestServlet(RestServlet): time_now = self.clock.time_msec() response_content = yield self.encode_response( - time_now, sync_result, requester.access_token_id, filter + time_now, sync_result, requester.access_token_id, filter_collection ) return 200, response_content diff --git a/tests/rest/client/v2_alpha/test_filter.py b/tests/rest/client/v2_alpha/test_filter.py index f42a8efbf4..e0e9e94fbf 100644 --- a/tests/rest/client/v2_alpha/test_filter.py +++ b/tests/rest/client/v2_alpha/test_filter.py @@ -92,7 +92,7 @@ class FilterTestCase(unittest.HomeserverTestCase): ) self.render(request) - self.assertEqual(channel.result["code"], b"400") + self.assertEqual(channel.result["code"], b"404") self.assertEquals(channel.json_body["errcode"], Codes.NOT_FOUND) # Currently invalid params do not have an appropriate errcode -- cgit 1.4.1 From 9a84d74417a1c9fbcd6c57e7ef23e5590e04ef49 Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Thu, 10 Oct 2019 13:03:44 +0100 Subject: before fulfilling a group invite,check if user is already joined/invited (#3436) Fixes vector-im/riot-web#5645 --- changelog.d/3436.bugfix | 1 + synapse/groups/groups_server.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 changelog.d/3436.bugfix (limited to 'synapse') diff --git a/changelog.d/3436.bugfix b/changelog.d/3436.bugfix new file mode 100644 index 0000000000..15714a11e0 --- /dev/null +++ b/changelog.d/3436.bugfix @@ -0,0 +1 @@ +Fix a problem where users could be invited twice to the same group. diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py index d50e691436..8f10b6adbb 100644 --- a/synapse/groups/groups_server.py +++ b/synapse/groups/groups_server.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2017 Vector Creations Ltd # Copyright 2018 New Vector Ltd +# Copyright 2019 Michael Telatynski <7t3chguy@gmail.com> # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,16 +21,16 @@ from six import string_types from twisted.internet import defer -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, SynapseError from synapse.types import GroupID, RoomID, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute logger = logging.getLogger(__name__) -# TODO: Allow users to "knock" or simpkly join depending on rules +# TODO: Allow users to "knock" or simply join depending on rules # TODO: Federation admin APIs -# TODO: is_priveged flag to users and is_public to users and rooms +# TODO: is_privileged flag to users and is_public to users and rooms # TODO: Audit log for admins (profile updates, membership changes, users who tried # to join but were rejected, etc) # TODO: Flairs @@ -590,7 +591,18 @@ class GroupsServerHandler(object): ) # TODO: Check if user knocked - # TODO: Check if user is already invited + + invited_users = yield self.store.get_invited_users_in_group(group_id) + if user_id in invited_users: + raise SynapseError( + 400, "User already invited to group", errcode=Codes.BAD_STATE + ) + + user_results = yield self.store.get_users_in_group( + group_id, include_private=True + ) + if user_id in [user_result["user_id"] for user_result in user_results]: + raise SynapseError(400, "User already in group") content = { "profile": {"name": group["name"], "avatar_url": group["avatar_url"]}, -- cgit 1.4.1 From b5b03b7079a9baa34a25915d6a569e383e8307c3 Mon Sep 17 00:00:00 2001 From: werner291 Date: Thu, 10 Oct 2019 14:05:48 +0200 Subject: Add domain validation when creating room with list of invitees (#6121) --- changelog.d/4088.bugfix | 1 + synapse/handlers/room.py | 4 +++- tests/rest/client/v1/test_rooms.py | 9 +++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4088.bugfix (limited to 'synapse') diff --git a/changelog.d/4088.bugfix b/changelog.d/4088.bugfix new file mode 100644 index 0000000000..61722b6224 --- /dev/null +++ b/changelog.d/4088.bugfix @@ -0,0 +1 @@ +Added domain validation when including a list of invitees upon room creation. \ No newline at end of file diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 970be3c846..2816bd8f87 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -28,6 +28,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules, RoomCreationPreset from synapse.api.errors import AuthError, Codes, NotFoundError, StoreError, SynapseError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS +from synapse.http.endpoint import parse_and_validate_server_name from synapse.storage.state import StateFilter from synapse.types import RoomAlias, RoomID, RoomStreamToken, StreamToken, UserID from synapse.util import stringutils @@ -554,7 +555,8 @@ class RoomCreationHandler(BaseHandler): invite_list = config.get("invite", []) for i in invite_list: try: - UserID.from_string(i) + uid = UserID.from_string(i) + parse_and_validate_server_name(uid.domain) except Exception: raise SynapseError(400, "Invalid user_id: %s" % (i,)) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index fe741637f5..2f2ca74611 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -484,6 +484,15 @@ class RoomsCreateTestCase(RoomBase): self.render(request) self.assertEquals(400, channel.code) + def test_post_room_invitees_invalid_mxid(self): + # POST with invalid invitee, see https://github.com/matrix-org/synapse/issues/4088 + # Note the trailing space in the MXID here! + request, channel = self.make_request( + "POST", "/createRoom", b'{"invite":["@alice:example.com "]}' + ) + self.render(request) + self.assertEquals(400, channel.code) + class RoomTopicTestCase(RoomBase): """ Tests /rooms/$room_id/topic REST events. """ -- cgit 1.4.1 From b4fbf71187545748edf3ebd931b49350e5b1ca74 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 19:06:12 +0100 Subject: Add helper funcs to use postgres ANY This means that we can write queries with `col = ANY(?)`, which helps postgres. --- synapse/storage/_base.py | 64 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 8 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index abe16334ec..a94cbc27d3 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -20,6 +20,7 @@ import random import sys import threading import time +from typing import Iterable, List, Tuple from six import PY2, iteritems, iterkeys, itervalues from six.moves import builtins, intern, range @@ -1162,19 +1163,20 @@ class SQLBaseStore(object): if not iterable: return [] - sql = "SELECT %s FROM %s" % (", ".join(retcols), table) - clauses = [] values = [] - clauses.append("%s IN (%s)" % (column, ",".join("?" for _ in iterable))) - values.extend(iterable) + + add_in_list_sql_clause(txn.database_engine, column, iterable, clauses, values) for key, value in iteritems(keyvalues): clauses.append("%s = ?" % (key,)) values.append(value) - if clauses: - sql = "%s WHERE %s" % (sql, " AND ".join(clauses)) + sql = "SELECT %s FROM %s WHERE %s" % ( + ", ".join(retcols), + table, + " AND ".join(clauses), + ) txn.execute(sql, values) return cls.cursor_to_dict(txn) @@ -1325,8 +1327,8 @@ class SQLBaseStore(object): clauses = [] values = [] - clauses.append("%s IN (%s)" % (column, ",".join("?" for _ in iterable))) - values.extend(iterable) + + add_in_list_sql_clause(txn.database_engine, column, iterable, clauses, values) for key, value in iteritems(keyvalues): clauses.append("%s = ?" % (key,)) @@ -1693,3 +1695,49 @@ def db_to_json(db_content): except Exception: logging.warning("Tried to decode '%r' as JSON and failed", db_content) raise + + +def add_in_list_sql_clause( + database_engine, column: str, iterable: Iterable, clauses: List[str], args: List +): + """Adds an SQL clause to the given list of clauses/args that checks the + given column is in the iterable. c.f. `make_in_list_sql_clause` + + Args: + database_engine + column: Name of the column + iterable: The values to check the column against. + clauses: A list to add the expanded clause to + args: A list of arguments that we append the args to. + """ + + clause, new_args = make_in_list_sql_clause(database_engine, column, iterable) + clauses.append(clause) + args.extend(new_args) + + +def make_in_list_sql_clause( + database_engine, column: str, iterable: Iterable +) -> Tuple[str, Iterable]: + """Returns an SQL clause that checks the given column is in the iterable. + + On SQLite this expands to `column IN (?, ?, ...)`, whereas on Postgres + it expands to `column = ANY(?)`. While both DBs support the `IN` form, + using the `ANY` form on postgres means that it views queries with + different length iterables as the same, helping the query stats. + + Args: + database_engine + column: Name of the column + iterable: The values to check the column against. + + Returns: + A tuple of SQL query and the args + """ + + if isinstance(database_engine, PostgresEngine): + # This should hopefully be faster, but also makes postgres query + # stats easier to understand. + return "%s = ANY(?)" % (column,), [list(iterable)] + else: + return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), iterable -- cgit 1.4.1 From b161786c1455f219d58549b514f2551f25eae33a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 19:07:07 +0100 Subject: Replace IN usage with helper funcs --- synapse/storage/deviceinbox.py | 14 ++++----- synapse/storage/devices.py | 14 +++++++-- synapse/storage/event_federation.py | 9 ++++-- synapse/storage/events.py | 44 ++++++++++++++++++----------- synapse/storage/events_bg_updates.py | 12 ++++---- synapse/storage/events_worker.py | 28 ++++++++++-------- synapse/storage/presence.py | 14 ++++----- synapse/storage/receipts.py | 53 ++++++++++++++++++++--------------- synapse/storage/roommember.py | 18 ++++++++---- synapse/storage/search.py | 11 +++++--- synapse/storage/user_erasure_store.py | 18 ++++++------ 11 files changed, 138 insertions(+), 97 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index 70bc2bb2cc..f04aad0743 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -20,7 +20,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.logging.opentracing import log_kv, set_tag, trace -from synapse.storage._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.expiringcache import ExpiringCache @@ -378,15 +378,15 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore) else: if not devices: continue - sql = ( - "SELECT device_id FROM devices" - " WHERE user_id = ? AND device_id IN (" - + ",".join("?" * len(devices)) - + ")" + + clause, args = make_in_list_sql_clause( + txn.database_engine, "device_id", devices ) + sql = "SELECT device_id FROM devices WHERE user_id = ? AND " + clause + # TODO: Maybe this needs to be done in batches if there are # too many local devices for a given user. - txn.execute(sql, [user_id] + devices) + txn.execute(sql, [user_id] + list(args)) for row in txn: # Only insert into the local inbox if the device exists on # this server diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 111bfb3d64..ac5239e50a 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -28,7 +28,12 @@ from synapse.logging.opentracing import ( whitelisted_homeserver, ) from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage._base import Cache, SQLBaseStore, db_to_json +from synapse.storage._base import ( + Cache, + SQLBaseStore, + db_to_json, + make_in_list_sql_clause, +) from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util import batch_iter from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList @@ -448,11 +453,14 @@ class DeviceWorkerStore(SQLBaseStore): sql = """ SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ? - AND user_id IN (%s) + AND """ for chunk in batch_iter(to_check, 100): - txn.execute(sql % (",".join("?" for _ in chunk),), (from_key,) + chunk) + clause, args = make_in_list_sql_clause( + txn.database_engine, "user_id", chunk + ) + txn.execute(sql + clause, (from_key,) + tuple(args)) changes.update(user_id for user_id, in txn) return changes diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index f5e8c39262..47cc10d32a 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -25,7 +25,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.signatures import SignatureWorkerStore from synapse.util.caches.descriptors import cached @@ -68,7 +68,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas else: results = set() - base_sql = "SELECT auth_id FROM event_auth WHERE event_id IN (%s)" + base_sql = "SELECT auth_id FROM event_auth WHERE " front = set(event_ids) while front: @@ -76,7 +76,10 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas front_list = list(front) chunks = [front_list[x : x + 100] for x in range(0, len(front), 100)] for chunk in chunks: - txn.execute(base_sql % (",".join(["?"] * len(chunk)),), chunk) + clause, args = make_in_list_sql_clause( + txn.database_engine, "event_id", chunk + ) + txn.execute(base_sql + clause, list(args)) new_front.update([r[0] for r in txn]) new_front -= results diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bb6ff0595a..ee49ef235d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -39,6 +39,7 @@ from synapse.logging.utils import log_function from synapse.metrics import BucketCollector from synapse.metrics.background_process_metrics import run_as_background_process from synapse.state import StateResolutionStore +from synapse.storage._base import make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.events_worker import EventsWorkerStore @@ -641,14 +642,16 @@ class EventsStore( LEFT JOIN rejections USING (event_id) LEFT JOIN event_json USING (event_id) WHERE - prev_event_id IN (%s) - AND NOT events.outlier + NOT events.outlier AND rejections.event_id IS NULL - """ % ( - ",".join("?" for _ in batch), + AND + """ + + clause, args = make_in_list_sql_clause( + self.database_engine, "prev_event_id", batch ) - txn.execute(sql, batch) + txn.execute(sql + clause, args) results.extend(r[0] for r in txn if not json.loads(r[1]).get("soft_failed")) for chunk in batch_iter(event_ids, 100): @@ -695,13 +698,15 @@ class EventsStore( LEFT JOIN rejections USING (event_id) LEFT JOIN event_json USING (event_id) WHERE - event_id IN (%s) - AND NOT events.outlier - """ % ( - ",".join("?" for _ in to_recursively_check), + NOT events.outlier + AND + """ + + clause, args = make_in_list_sql_clause( + self.database_engine, "event_id", to_recursively_check ) - txn.execute(sql, to_recursively_check) + txn.execute(sql + clause, args) to_recursively_check = [] for event_id, prev_event_id, metadata, rejected in txn: @@ -1543,10 +1548,14 @@ class EventsStore( " FROM events as e" " LEFT JOIN rejections as rej USING (event_id)" " LEFT JOIN redactions as r ON e.event_id = r.redacts" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(ev_map)),) + " WHERE " + ) + + clause, args = make_in_list_sql_clause( + self.database_engine, "e.event_id", list(ev_map) + ) - txn.execute(sql, list(ev_map)) + txn.execute(sql + clause, args) rows = self.cursor_to_dict(txn) for row in rows: event = ev_map[row["event_id"]] @@ -2249,11 +2258,12 @@ class EventsStore( sql = """ SELECT DISTINCT state_group FROM event_to_state_groups LEFT JOIN events_to_purge AS ep USING (event_id) - WHERE state_group IN (%s) AND ep.event_id IS NULL - """ % ( - ",".join("?" for _ in current_search), + WHERE ep.event_id IS NULL AND + """ + clause, args = make_in_list_sql_clause( + txn.database_engine, "state_group", current_search ) - txn.execute(sql, list(current_search)) + txn.execute(sql + clause, list(args)) referenced = set(sg for sg, in txn) referenced_groups |= referenced diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 5717baf48c..97728a6da2 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -21,6 +21,7 @@ from canonicaljson import json from twisted.internet import defer +from synapse.storage._base import make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore logger = logging.getLogger(__name__) @@ -312,12 +313,13 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): INNER JOIN event_json USING (event_id) LEFT JOIN rejections USING (event_id) WHERE - prev_event_id IN (%s) - AND NOT events.outlier - """ % ( - ",".join("?" for _ in to_check), + NOT events.outlier + AND + """ + clause, args = make_in_list_sql_clause( + self.database_engine, "prev_event_id", to_check ) - txn.execute(sql, to_check) + txn.execute(sql + clause, list(args)) for prev_event_id, event_id, metadata, rejected in txn: if event_id in graph: diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 57ce0304e9..4c4b76bd93 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -31,12 +31,11 @@ from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event from synapse.logging.context import LoggingContext, PreserveLoggingContext from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.types import get_domain_from_id from synapse.util import batch_iter from synapse.util.metrics import Measure -from ._base import SQLBaseStore - logger = logging.getLogger(__name__) @@ -623,10 +622,14 @@ class EventsWorkerStore(SQLBaseStore): " rej.reason " " FROM event_json as e" " LEFT JOIN rejections as rej USING (event_id)" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(evs)),) + " WHERE " + ) - txn.execute(sql, evs) + clause, args = make_in_list_sql_clause( + txn.database_engine, "e.event_id", evs + ) + + txn.execute(sql + clause, args) for row in txn: event_id = row[0] @@ -640,11 +643,11 @@ class EventsWorkerStore(SQLBaseStore): } # check for redactions - redactions_sql = ( - "SELECT event_id, redacts FROM redactions WHERE redacts IN (%s)" - ) % (",".join(["?"] * len(evs)),) + redactions_sql = "SELECT event_id, redacts FROM redactions WHERE " + + clause, args = make_in_list_sql_clause(txn.database_engine, "redacts", evs) - txn.execute(redactions_sql, evs) + txn.execute(redactions_sql + clause, args) for (redacter, redacted) in txn: d = event_dict.get(redacted) @@ -753,10 +756,11 @@ class EventsWorkerStore(SQLBaseStore): results = set() def have_seen_events_txn(txn, chunk): - sql = "SELECT event_id FROM events as e WHERE e.event_id IN (%s)" % ( - ",".join("?" * len(chunk)), + sql = "SELECT event_id FROM events as e WHERE " + clause, args = make_in_list_sql_clause( + txn.database_engine, "e.event_id", chunk ) - txn.execute(sql, chunk) + txn.execute(sql + clause, args) for (event_id,) in txn: results.add(event_id) diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py index 5db6f2d84a..3a641f538b 100644 --- a/synapse/storage/presence.py +++ b/synapse/storage/presence.py @@ -18,11 +18,10 @@ from collections import namedtuple from twisted.internet import defer from synapse.api.constants import PresenceState +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.util import batch_iter from synapse.util.caches.descriptors import cached, cachedList -from ._base import SQLBaseStore - class UserPresenceState( namedtuple( @@ -119,14 +118,13 @@ class PresenceStore(SQLBaseStore): ) # Delete old rows to stop database from getting really big - sql = ( - "DELETE FROM presence_stream WHERE" " stream_id < ?" " AND user_id IN (%s)" - ) + sql = "DELETE FROM presence_stream WHERE stream_id < ? AND " for states in batch_iter(presence_states, 50): - args = [stream_id] - args.extend(s.user_id for s in states) - txn.execute(sql % (",".join("?" for _ in states),), args) + clause, args = make_in_list_sql_clause( + self.database_engine, "user_id", [s.user_id for s in states] + ) + txn.execute(sql + clause, [stream_id] + list(args)) def get_all_presence_updates(self, last_id, current_id): if last_id == current_id: diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 290ddb30e8..0c24430f28 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -21,12 +21,11 @@ from canonicaljson import json from twisted.internet import defer +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause +from synapse.storage.util.id_generators import StreamIdGenerator from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache -from ._base import SQLBaseStore -from .util.id_generators import StreamIdGenerator - logger = logging.getLogger(__name__) @@ -217,24 +216,26 @@ class ReceiptsWorkerStore(SQLBaseStore): def f(txn): if from_key: - sql = ( - "SELECT * FROM receipts_linearized WHERE" - " room_id IN (%s) AND stream_id > ? AND stream_id <= ?" - ) % (",".join(["?"] * len(room_ids))) - args = list(room_ids) - args.extend([from_key, to_key]) + sql = """ + SELECT * FROM receipts_linearized WHERE + stream_id > ? AND stream_id <= ? AND + """ + clause, args = make_in_list_sql_clause( + self.database_engine, "room_id", room_ids + ) - txn.execute(sql, args) + txn.execute(sql + clause, [from_key, to_key] + list(args)) else: - sql = ( - "SELECT * FROM receipts_linearized WHERE" - " room_id IN (%s) AND stream_id <= ?" - ) % (",".join(["?"] * len(room_ids))) + sql = """ + SELECT * FROM receipts_linearized WHERE + stream_id <= ? AND + """ - args = list(room_ids) - args.append(to_key) + clause, args = make_in_list_sql_clause( + self.database_engine, "room_id", room_ids + ) - txn.execute(sql, args) + txn.execute(sql + clause, [to_key] + list(args)) return self.cursor_to_dict(txn) @@ -433,13 +434,19 @@ class ReceiptsStore(ReceiptsWorkerStore): # we need to points in graph -> linearized form. # TODO: Make this better. def graph_to_linear(txn): - query = ( - "SELECT event_id WHERE room_id = ? AND stream_ordering IN (" - " SELECT max(stream_ordering) WHERE event_id IN (%s)" - ")" - ) % (",".join(["?"] * len(event_ids))) + clause, args = make_in_list_sql_clause( + self.database_engine, "event_id", event_ids + ) + + sql = """ + SELECT event_id WHERE room_id = ? AND stream_ordering IN ( + SELECT max(stream_ordering) WHERE %s + ) + """ % ( + clause, + ) - txn.execute(query, [room_id] + event_ids) + txn.execute(sql, [room_id] + list(args)) rows = txn.fetchall() if rows: return rows[0][0] diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4e606a8380..ff63487823 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -26,7 +26,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage._base import LoggingTransaction +from synapse.storage._base import LoggingTransaction, make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import Sqlite3Engine from synapse.storage.events_worker import EventsWorkerStore @@ -372,6 +372,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): results = [] if membership_list: if self._current_state_events_membership_up_to_date: + clause, args = make_in_list_sql_clause( + self.database_engine, "c.membership", membership_list + ) sql = """ SELECT room_id, e.sender, c.membership, event_id, e.stream_ordering FROM current_state_events AS c @@ -379,11 +382,14 @@ class RoomMemberWorkerStore(EventsWorkerStore): WHERE c.type = 'm.room.member' AND state_key = ? - AND c.membership IN (%s) + AND %s """ % ( - ",".join("?" * len(membership_list)) + clause, ) else: + clause, args = make_in_list_sql_clause( + self.database_engine, "m.membership", membership_list + ) sql = """ SELECT room_id, e.sender, m.membership, event_id, e.stream_ordering FROM current_state_events AS c @@ -392,12 +398,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): WHERE c.type = 'm.room.member' AND state_key = ? - AND m.membership IN (%s) + AND %s """ % ( - ",".join("?" * len(membership_list)) + clause, ) - txn.execute(sql, (user_id, *membership_list)) + txn.execute(sql, (user_id, *args)) results = [RoomsForUser(**r) for r in self.cursor_to_dict(txn)] if do_invite: diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 6ba4190f1a..4be6e56dfa 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -24,6 +24,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.errors import SynapseError +from synapse.storage._base import add_in_list_sql_clause from synapse.storage.engines import PostgresEngine, Sqlite3Engine from .background_updates import BackgroundUpdateStore @@ -385,8 +386,9 @@ class SearchStore(SearchBackgroundUpdateStore): # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. if len(room_ids) < 500: - clauses.append("room_id IN (%s)" % (",".join(["?"] * len(room_ids)),)) - args.extend(room_ids) + add_in_list_sql_clause( + self.database_engine, "room_id", room_ids, clauses, args + ) local_clauses = [] for key in keys: @@ -492,8 +494,9 @@ class SearchStore(SearchBackgroundUpdateStore): # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. if len(room_ids) < 500: - clauses.append("room_id IN (%s)" % (",".join(["?"] * len(room_ids)),)) - args.extend(room_ids) + add_in_list_sql_clause( + self.database_engine, "room_id", room_ids, clauses, args + ) local_clauses = [] for key in keys: diff --git a/synapse/storage/user_erasure_store.py b/synapse/storage/user_erasure_store.py index 05cabc2282..aa4f0da5f0 100644 --- a/synapse/storage/user_erasure_store.py +++ b/synapse/storage/user_erasure_store.py @@ -56,15 +56,15 @@ class UserErasureWorkerStore(SQLBaseStore): # iterate it multiple times, and (b) avoiding duplicates. user_ids = tuple(set(user_ids)) - def _get_erased_users(txn): - txn.execute( - "SELECT user_id FROM erased_users WHERE user_id IN (%s)" - % (",".join("?" * len(user_ids))), - user_ids, - ) - return set(r[0] for r in txn) - - erased_users = yield self.runInteraction("are_users_erased", _get_erased_users) + rows = yield self._simple_select_many_batch( + table="erased_users", + column="user_id", + iterable=user_ids, + retcols=("user_id",), + desc="are_users_erased", + ) + erased_users = set(row["user_id"] for row in rows) + res = dict((u, u in erased_users) for u in user_ids) return res -- cgit 1.4.1 From fe1c1e6c28e09f88b30e0587161f9b1dbd6e8acf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 13:17:19 +0100 Subject: Fixup comments Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/util/patch_inline_callbacks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py index 66c3d47519..3925927f9f 100644 --- a/synapse/util/patch_inline_callbacks.py +++ b/synapse/util/patch_inline_callbacks.py @@ -114,7 +114,7 @@ def _check_yield_points(f: Callable, changes: List[str]): """Wraps a generator that is about to be passed to defer.inlineCallbacks checking that after every yield the log contexts are correct. - Its perfectly valid for log contexts to change within a function, e.g. due + It's perfectly valid for log contexts to change within a function, e.g. due to new Measure blocks, so such changes are added to the given `changes` list instead of triggering an exception. @@ -150,7 +150,7 @@ def _check_yield_points(f: Callable, changes: List[str]): # final yield and returning. E.g. we forgot to yield on a # function that returns a deferred. # - # We don't raise here as its perfectly valid for contexts to + # We don't raise here as it's perfectly valid for contexts to # change in a function, as long as it sets the correct context # on resolving (which is checked separately). err = ( @@ -171,7 +171,7 @@ def _check_yield_points(f: Callable, changes: List[str]): if isinstance(d, defer.Deferred) and not d.called: # This happens if we yield on a deferred that doesn't follow - # the log context rules without wrappin in a `make_deferred_yieldable`. + # the log context rules without wrapping in a `make_deferred_yieldable`. # We raise here as this should never happen. if LoggingContext.current_context() is not LoggingContext.sentinel: err = ( -- cgit 1.4.1 From ca3e01e50d2caca6a55b7c7808f0e948b430363d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 14:52:29 +0100 Subject: Fix store_url_cache using bytes --- synapse/rest/media/v1/preview_url_resource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 7a56cd4b6c..0c68c3aad5 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -270,7 +270,7 @@ class PreviewUrlResource(DirectServeResource): logger.debug("Calculated OG for %s as %s" % (url, og)) - jsonog = json.dumps(og).encode("utf8") + jsonog = json.dumps(og) # store OG in history-aware DB cache yield self.store.store_url_cache( @@ -283,7 +283,7 @@ class PreviewUrlResource(DirectServeResource): media_info["created_ts"], ) - return jsonog + return jsonog.encode("utf8") @defer.inlineCallbacks def _download_url(self, url, user): -- cgit 1.4.1 From 3bc687508fa6c4cf82b5ddb22ce6f3674433d0ff Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 15:35:46 +0100 Subject: Remove add_in_list_sql_clause --- synapse/storage/_base.py | 35 ++++++----------------------------- synapse/storage/engines/postgres.py | 6 ++++++ synapse/storage/engines/sqlite.py | 6 ++++++ synapse/storage/search.py | 12 +++++++----- 4 files changed, 25 insertions(+), 34 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 085b8ae871..6176838aa6 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -20,7 +20,7 @@ import random import sys import threading import time -from typing import Iterable, List, Tuple +from typing import Iterable, Tuple from six import PY2, iteritems, iterkeys, itervalues from six.moves import builtins, intern, range @@ -1164,10 +1164,8 @@ class SQLBaseStore(object): if not iterable: return [] - clauses = [] - values = [] - - add_in_list_sql_clause(txn.database_engine, column, iterable, clauses, values) + clause, values = make_in_list_sql_clause(txn.database_engine, column, iterable) + clauses = [clause] for key, value in iteritems(keyvalues): clauses.append("%s = ?" % (key,)) @@ -1326,10 +1324,8 @@ class SQLBaseStore(object): sql = "DELETE FROM %s" % table - clauses = [] - values = [] - - add_in_list_sql_clause(txn.database_engine, column, iterable, clauses, values) + clause, values = make_in_list_sql_clause(txn.database_engine, column, iterable) + clauses = [clause] for key, value in iteritems(keyvalues): clauses.append("%s = ?" % (key,)) @@ -1698,25 +1694,6 @@ def db_to_json(db_content): raise -def add_in_list_sql_clause( - database_engine, column: str, iterable: Iterable, clauses: List[str], args: List -): - """Adds an SQL clause to the given list of clauses/args that checks the - given column is in the iterable. c.f. `make_in_list_sql_clause` - - Args: - database_engine - column: Name of the column - iterable: The values to check the column against. - clauses: A list to add the expanded clause to - args: A list of arguments that we append the args to. - """ - - clause, new_args = make_in_list_sql_clause(database_engine, column, iterable) - clauses.append(clause) - args.extend(new_args) - - def make_in_list_sql_clause( database_engine, column: str, iterable: Iterable ) -> Tuple[str, Iterable]: @@ -1736,7 +1713,7 @@ def make_in_list_sql_clause( A tuple of SQL query and the args """ - if isinstance(database_engine, PostgresEngine): + if database_engine.supports_using_any_list: # This should hopefully be faster, but also makes postgres query # stats easier to understand. return "%s = ANY(?)" % (column,), [list(iterable)] diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 601617b21e..f36600b4bb 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -79,6 +79,12 @@ class PostgresEngine(object): """ return True + @property + def supports_using_any_list(self): + """Do we support using `a = ANY(?)` and passing a list + """ + return True + def is_deadlock(self, error): if isinstance(error, self.module.DatabaseError): # https://www.postgresql.org/docs/current/static/errcodes-appendix.html diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index ac92109366..2526258060 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -46,6 +46,12 @@ class Sqlite3Engine(object): """ return self.module.sqlite_version_info >= (3, 15, 0) + @property + def supports_any_list(self): + """Do we support using `a = ANY(?)` and passing a list + """ + return False + def check_database(self, txn): pass diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 4be6e56dfa..7695bf09fc 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -24,7 +24,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.errors import SynapseError -from synapse.storage._base import add_in_list_sql_clause +from synapse.storage._base import make_in_list_sql_clause from synapse.storage.engines import PostgresEngine, Sqlite3Engine from .background_updates import BackgroundUpdateStore @@ -386,9 +386,10 @@ class SearchStore(SearchBackgroundUpdateStore): # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. if len(room_ids) < 500: - add_in_list_sql_clause( - self.database_engine, "room_id", room_ids, clauses, args + clause, args = make_in_list_sql_clause( + self.database_engine, "room_id", room_ids ) + clauses = [clause] local_clauses = [] for key in keys: @@ -494,9 +495,10 @@ class SearchStore(SearchBackgroundUpdateStore): # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. if len(room_ids) < 500: - add_in_list_sql_clause( - self.database_engine, "room_id", room_ids, clauses, args + clause, args = make_in_list_sql_clause( + self.database_engine, "room_id", room_ids ) + clauses = [clause] local_clauses = [] for key in keys: -- cgit 1.4.1 From afb6d9d53b417ff3b651767ab88bf63606e7225e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 15:55:41 +0100 Subject: Fix SQLite --- synapse/storage/engines/sqlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index 2526258060..ddad17dc5a 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -47,7 +47,7 @@ class Sqlite3Engine(object): return self.module.sqlite_version_info >= (3, 15, 0) @property - def supports_any_list(self): + def supports_using_any_list(self): """Do we support using `a = ANY(?)` and passing a list """ return False -- cgit 1.4.1 From b54b1e759a9bc6517d5a31e3ea732cecb307d4c6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 10 Oct 2019 16:19:40 +0100 Subject: Fix SQLite take 2 --- synapse/storage/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 6176838aa6..f5906fcd54 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1718,4 +1718,4 @@ def make_in_list_sql_clause( # stats easier to understand. return "%s = ANY(?)" % (column,), [list(iterable)] else: - return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), iterable + return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), list(iterable) -- cgit 1.4.1 From 4908fb3b30ac007fda5993521448804067751a6d Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 10 Oct 2019 15:56:00 -0400 Subject: make storage layer in charge of interpreting the device key data --- synapse/handlers/e2e_keys.py | 11 ----------- synapse/storage/end_to_end_keys.py | 11 +++++++++-- tests/storage/test_end_to_end_keys.py | 12 ++++++------ 3 files changed, 15 insertions(+), 19 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 056fb97acb..6708d983ac 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -248,17 +248,6 @@ class E2eKeysHandler(object): results = yield self.store.get_e2e_device_keys(local_query) - # Build the result structure, un-jsonify the results, and add the - # "unsigned" section - for user_id, device_keys in results.items(): - for device_id, device_info in device_keys.items(): - r = dict(device_info["keys"]) - r["unsigned"] = {} - display_name = device_info["device_display_name"] - if display_name is not None: - r["unsigned"]["device_display_name"] = display_name - result_dict[user_id][device_id] = r - log_kv(results) return result_dict diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 33e3a84933..d802d7a485 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -40,7 +40,7 @@ class EndToEndKeyWorkerStore(SQLBaseStore): This option only takes effect if include_all_devices is true. Returns: Dict mapping from user-id to dict mapping from device_id to - dict containing "key_json", "device_display_name". + key data. """ set_tag("query_list", query_list) if not query_list: @@ -54,9 +54,16 @@ class EndToEndKeyWorkerStore(SQLBaseStore): include_deleted_devices, ) + # Build the result structure, un-jsonify the results, and add the + # "unsigned" section for user_id, device_keys in iteritems(results): for device_id, device_info in iteritems(device_keys): - device_info["keys"] = db_to_json(device_info.pop("key_json")) + r = db_to_json(device_info.pop("key_json")) + r["unsigned"] = {} + display_name = device_info["device_display_name"] + if display_name is not None: + r["unsigned"]["device_display_name"] = display_name + results[user_id][device_id] = r return results diff --git a/tests/storage/test_end_to_end_keys.py b/tests/storage/test_end_to_end_keys.py index c8ece15284..398d546280 100644 --- a/tests/storage/test_end_to_end_keys.py +++ b/tests/storage/test_end_to_end_keys.py @@ -38,7 +38,7 @@ class EndToEndKeyStoreTestCase(tests.unittest.TestCase): self.assertIn("user", res) self.assertIn("device", res["user"]) dev = res["user"]["device"] - self.assertDictContainsSubset({"keys": json, "device_display_name": None}, dev) + self.assertDictContainsSubset(json, dev) @defer.inlineCallbacks def test_reupload_key(self): @@ -68,7 +68,7 @@ class EndToEndKeyStoreTestCase(tests.unittest.TestCase): self.assertIn("device", res["user"]) dev = res["user"]["device"] self.assertDictContainsSubset( - {"keys": json, "device_display_name": "display_name"}, dev + {"key": "value", "unsigned": {"device_display_name": "display_name"}}, dev ) @defer.inlineCallbacks @@ -80,10 +80,10 @@ class EndToEndKeyStoreTestCase(tests.unittest.TestCase): yield self.store.store_device("user2", "device1", None) yield self.store.store_device("user2", "device2", None) - yield self.store.set_e2e_device_keys("user1", "device1", now, "json11") - yield self.store.set_e2e_device_keys("user1", "device2", now, "json12") - yield self.store.set_e2e_device_keys("user2", "device1", now, "json21") - yield self.store.set_e2e_device_keys("user2", "device2", now, "json22") + yield self.store.set_e2e_device_keys("user1", "device1", now, {"key": "json11"}) + yield self.store.set_e2e_device_keys("user1", "device2", now, {"key": "json12"}) + yield self.store.set_e2e_device_keys("user2", "device1", now, {"key": "json21"}) + yield self.store.set_e2e_device_keys("user2", "device2", now, {"key": "json22"}) res = yield self.store.get_e2e_device_keys( (("user1", "device1"), ("user2", "device2")) -- cgit 1.4.1 From 7a0dce92594d05179234095899c3d09a8a744cbb Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 10 Oct 2019 20:31:30 -0400 Subject: make sure we actually return something --- synapse/handlers/e2e_keys.py | 5 +++++ synapse/storage/end_to_end_keys.py | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 6708d983ac..0a84d0e2b0 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -248,6 +248,11 @@ class E2eKeysHandler(object): results = yield self.store.get_e2e_device_keys(local_query) + # Build the result structure + for user_id, device_keys in results.items(): + for device_id, device_info in device_keys.items(): + result_dict[user_id][device_id] = device_info + log_kv(results) return result_dict diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index d802d7a485..b00a391c82 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -56,16 +56,18 @@ class EndToEndKeyWorkerStore(SQLBaseStore): # Build the result structure, un-jsonify the results, and add the # "unsigned" section + rv = {} for user_id, device_keys in iteritems(results): + rv[user_id] = {} for device_id, device_info in iteritems(device_keys): r = db_to_json(device_info.pop("key_json")) r["unsigned"] = {} display_name = device_info["device_display_name"] if display_name is not None: r["unsigned"]["device_display_name"] = display_name - results[user_id][device_id] = r + rv[user_id][device_id] = r - return results + return rv @trace def _get_e2e_device_keys_txn( -- cgit 1.4.1 From a0d0ba7862e38588aa0d0ac29a720fdf06f1ab8d Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 11 Oct 2019 09:38:26 +0100 Subject: Fix MAU reaping where reserved users are specified. (#6168) --- changelog.d/6168.bugfix | 1 + synapse/app/homeserver.py | 6 +- synapse/storage/monthly_active_users.py | 101 ++++++++++++++++++----------- tests/storage/test_monthly_active_users.py | 58 ++++++++++++++--- 4 files changed, 115 insertions(+), 51 deletions(-) create mode 100644 changelog.d/6168.bugfix (limited to 'synapse') diff --git a/changelog.d/6168.bugfix b/changelog.d/6168.bugfix new file mode 100644 index 0000000000..39e8e9d019 --- /dev/null +++ b/changelog.d/6168.bugfix @@ -0,0 +1 @@ +Fix monthly active user reaping where reserved users are specified. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 774326dff9..eb54f56853 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -605,13 +605,13 @@ def run(hs): @defer.inlineCallbacks def generate_monthly_active_users(): current_mau_count = 0 - reserved_count = 0 + reserved_users = () store = hs.get_datastore() if hs.config.limit_usage_by_mau or hs.config.mau_stats_only: current_mau_count = yield store.get_monthly_active_count() - reserved_count = yield store.get_registered_reserved_users_count() + reserved_users = yield store.get_registered_reserved_users() current_mau_gauge.set(float(current_mau_count)) - registered_reserved_users_mau_gauge.set(float(reserved_count)) + registered_reserved_users_mau_gauge.set(float(len(reserved_users))) max_mau_gauge.set(float(hs.config.max_mau_value)) def start_generate_monthly_active_users(): diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 752e9788a2..3803604be7 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -32,7 +32,6 @@ class MonthlyActiveUsersStore(SQLBaseStore): super(MonthlyActiveUsersStore, self).__init__(None, hs) self._clock = hs.get_clock() self.hs = hs - self.reserved_users = () # Do not add more reserved users than the total allowable number self._new_transaction( dbconn, @@ -51,7 +50,6 @@ class MonthlyActiveUsersStore(SQLBaseStore): txn (cursor): threepids (list[dict]): List of threepid dicts to reserve """ - reserved_user_list = [] for tp in threepids: user_id = self.get_user_id_by_threepid_txn(txn, tp["medium"], tp["address"]) @@ -60,10 +58,8 @@ class MonthlyActiveUsersStore(SQLBaseStore): is_support = self.is_support_user_txn(txn, user_id) if not is_support: self.upsert_monthly_active_user_txn(txn, user_id) - reserved_user_list.append(user_id) else: logger.warning("mau limit reserved threepid %s not found in db" % tp) - self.reserved_users = tuple(reserved_user_list) @defer.inlineCallbacks def reap_monthly_active_users(self): @@ -74,8 +70,11 @@ class MonthlyActiveUsersStore(SQLBaseStore): Deferred[] """ - def _reap_users(txn): - # Purge stale users + def _reap_users(txn, reserved_users): + """ + Args: + reserved_users (tuple): reserved users to preserve + """ thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) query_args = [thirty_days_ago] @@ -83,20 +82,19 @@ class MonthlyActiveUsersStore(SQLBaseStore): # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres # when len(reserved_users) == 0. Works fine on sqlite. - if len(self.reserved_users) > 0: + if len(reserved_users) > 0: # questionmarks is a hack to overcome sqlite not supporting # tuples in 'WHERE IN %s' - questionmarks = "?" * len(self.reserved_users) + question_marks = ",".join("?" * len(reserved_users)) - query_args.extend(self.reserved_users) - sql = base_sql + """ AND user_id NOT IN ({})""".format( - ",".join(questionmarks) - ) + query_args.extend(reserved_users) + sql = base_sql + " AND user_id NOT IN ({})".format(question_marks) else: sql = base_sql txn.execute(sql, query_args) + max_mau_value = self.hs.config.max_mau_value if self.hs.config.limit_usage_by_mau: # If MAU user count still exceeds the MAU threshold, then delete on # a least recently active basis. @@ -106,31 +104,52 @@ class MonthlyActiveUsersStore(SQLBaseStore): # While Postgres does not require 'LIMIT', but also does not support # negative LIMIT values. So there is no way to write it that both can # support - safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) - # Must be greater than zero for postgres - safe_guard = safe_guard if safe_guard > 0 else 0 - query_args = [safe_guard] - - base_sql = """ - DELETE FROM monthly_active_users - WHERE user_id NOT IN ( - SELECT user_id FROM monthly_active_users - ORDER BY timestamp DESC - LIMIT ? + if len(reserved_users) == 0: + sql = """ + DELETE FROM monthly_active_users + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + ORDER BY timestamp DESC + LIMIT ? ) - """ + """ + txn.execute(sql, (max_mau_value,)) # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres # when len(reserved_users) == 0. Works fine on sqlite. - if len(self.reserved_users) > 0: - query_args.extend(self.reserved_users) - sql = base_sql + """ AND user_id NOT IN ({})""".format( - ",".join(questionmarks) - ) else: - sql = base_sql - txn.execute(sql, query_args) + # Must be >= 0 for postgres + num_of_non_reserved_users_to_remove = max( + max_mau_value - len(reserved_users), 0 + ) + + # It is important to filter reserved users twice to guard + # against the case where the reserved user is present in the + # SELECT, meaning that a legitmate mau is deleted. + sql = """ + DELETE FROM monthly_active_users + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + WHERE user_id NOT IN ({}) + ORDER BY timestamp DESC + LIMIT ? + ) + AND user_id NOT IN ({}) + """.format( + question_marks, question_marks + ) + + query_args = [ + *reserved_users, + num_of_non_reserved_users_to_remove, + *reserved_users, + ] - yield self.runInteraction("reap_monthly_active_users", _reap_users) + txn.execute(sql, query_args) + + reserved_users = yield self.get_registered_reserved_users() + yield self.runInteraction( + "reap_monthly_active_users", _reap_users, reserved_users + ) # It seems poor to invalidate the whole cache, Postgres supports # 'Returning' which would allow me to invalidate only the # specific users, but sqlite has no way to do this and instead @@ -159,21 +178,25 @@ class MonthlyActiveUsersStore(SQLBaseStore): return self.runInteraction("count_users", _count_users) @defer.inlineCallbacks - def get_registered_reserved_users_count(self): - """Of the reserved threepids defined in config, how many are associated + def get_registered_reserved_users(self): + """Of the reserved threepids defined in config, which are associated with registered users? Returns: - Defered[int]: Number of real reserved users + Defered[list]: Real reserved users """ - count = 0 - for tp in self.hs.config.mau_limits_reserved_threepids: + users = [] + + for tp in self.hs.config.mau_limits_reserved_threepids[ + : self.hs.config.max_mau_value + ]: user_id = yield self.hs.get_datastore().get_user_id_by_threepid( tp["medium"], tp["address"] ) if user_id: - count = count + 1 - return count + users.append(user_id) + + return users @defer.inlineCallbacks def upsert_monthly_active_user(self, user_id): diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 1494650d10..90a63dc477 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -50,6 +50,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): {"medium": "email", "address": user2_email}, {"medium": "email", "address": user3_email}, ] + self.hs.config.mau_limits_reserved_threepids = threepids # -1 because user3 is a support user and does not count user_num = len(threepids) - 1 @@ -84,6 +85,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): self.hs.config.max_mau_value = 0 self.reactor.advance(FORTY_DAYS) + self.hs.config.max_mau_value = 5 self.store.reap_monthly_active_users() self.pump() @@ -147,9 +149,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): self.store.reap_monthly_active_users() self.pump() count = self.store.get_monthly_active_count() - self.assertEquals( - self.get_success(count), initial_users - self.hs.config.max_mau_value - ) + self.assertEquals(self.get_success(count), self.hs.config.max_mau_value) self.reactor.advance(FORTY_DAYS) self.store.reap_monthly_active_users() @@ -158,6 +158,44 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): count = self.store.get_monthly_active_count() self.assertEquals(self.get_success(count), 0) + def test_reap_monthly_active_users_reserved_users(self): + """ Tests that reaping correctly handles reaping where reserved users are + present""" + + self.hs.config.max_mau_value = 5 + initial_users = 5 + reserved_user_number = initial_users - 1 + threepids = [] + for i in range(initial_users): + user = "@user%d:server" % i + email = "user%d@example.com" % i + self.get_success(self.store.upsert_monthly_active_user(user)) + threepids.append({"medium": "email", "address": email}) + # Need to ensure that the most recent entries in the + # monthly_active_users table are reserved + now = int(self.hs.get_clock().time_msec()) + if i != 0: + self.get_success( + self.store.register_user(user_id=user, password_hash=None) + ) + self.get_success( + self.store.user_add_threepid(user, "email", email, now, now) + ) + + self.hs.config.mau_limits_reserved_threepids = threepids + self.store.runInteraction( + "initialise", self.store._initialise_reserved_users, threepids + ) + count = self.store.get_monthly_active_count() + self.assertTrue(self.get_success(count), initial_users) + + users = self.store.get_registered_reserved_users() + self.assertEquals(len(self.get_success(users)), reserved_user_number) + + self.get_success(self.store.reap_monthly_active_users()) + count = self.store.get_monthly_active_count() + self.assertEquals(self.get_success(count), self.hs.config.max_mau_value) + def test_populate_monthly_users_is_guest(self): # Test that guest users are not added to mau list user_id = "@user_id:host" @@ -192,12 +230,13 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): def test_get_reserved_real_user_account(self): # Test no reserved users, or reserved threepids - count = self.store.get_registered_reserved_users_count() - self.assertEquals(self.get_success(count), 0) + users = self.get_success(self.store.get_registered_reserved_users()) + self.assertEquals(len(users), 0) # Test reserved users but no registered users user1 = "@user1:example.com" user2 = "@user2:example.com" + user1_email = "user1@example.com" user2_email = "user2@example.com" threepids = [ @@ -210,8 +249,8 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): ) self.pump() - count = self.store.get_registered_reserved_users_count() - self.assertEquals(self.get_success(count), 0) + users = self.get_success(self.store.get_registered_reserved_users()) + self.assertEquals(len(users), 0) # Test reserved registed users self.store.register_user(user_id=user1, password_hash=None) @@ -221,8 +260,9 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): now = int(self.hs.get_clock().time_msec()) self.store.user_add_threepid(user1, "email", user1_email, now, now) self.store.user_add_threepid(user2, "email", user2_email, now, now) - count = self.store.get_registered_reserved_users_count() - self.assertEquals(self.get_success(count), len(threepids)) + + users = self.get_success(self.store.get_registered_reserved_users()) + self.assertEquals(len(users), len(threepids)) def test_support_user_not_add_to_mau_limits(self): support_user_id = "@support:test" -- cgit 1.4.1 From f3ceaf432365bce77cff71116fb6c7d38e61c9ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Oct 2019 11:22:36 +0100 Subject: Trace non-JSON APIs, /media, /key etc --- synapse/http/server.py | 2 +- synapse/logging/opentracing.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/http/server.py b/synapse/http/server.py index cb9158fe1b..2ccb210fd6 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -388,7 +388,7 @@ class DirectServeResource(resource.Resource): if not callback: return super().render(request) - resp = callback(request) + resp = trace_servlet(self.__class__.__name__)(callback)(request) # If it's a coroutine, turn it into a Deferred if isinstance(resp, types.CoroutineType): diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index cd1ff6a518..0638cec429 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -169,6 +169,7 @@ import contextlib import inspect import logging import re +import types from functools import wraps from typing import Dict @@ -778,8 +779,7 @@ def trace_servlet(servlet_name, extract_context=False): return func @wraps(func) - @defer.inlineCallbacks - def _trace_servlet_inner(request, *args, **kwargs): + async def _trace_servlet_inner(request, *args, **kwargs): request_tags = { "request_id": request.get_request_id(), tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, @@ -796,8 +796,14 @@ def trace_servlet(servlet_name, extract_context=False): scope = start_active_span(servlet_name, tags=request_tags) with scope: - result = yield defer.maybeDeferred(func, request, *args, **kwargs) - return result + result = func(request, *args, **kwargs) + + if not isinstance(result, (types.CoroutineType, defer.Deferred)): + # Some servlets aren't async and just return results + # directly, so we handle that here. + return result + + return await result return _trace_servlet_inner -- cgit 1.4.1 From fca3a541e7e5845d61c519be7223a035374ed698 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Oct 2019 12:05:27 +0100 Subject: Port rest/admin/__init__.py to async/await --- synapse/rest/admin/__init__.py | 127 ++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 72 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 81b6bd8816..f7b9483008 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -23,8 +23,6 @@ import re from six import text_type from six.moves import http_client -from twisted.internet import defer - import synapse from synapse.api.constants import Membership, UserTypes from synapse.api.errors import Codes, NotFoundError, SynapseError @@ -59,15 +57,14 @@ class UsersRestServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, user_id): + async def on_GET(self, request, user_id): target_user = UserID.from_string(user_id) - yield assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self.auth, request) if not self.hs.is_mine(target_user): raise SynapseError(400, "Can only users a local user") - ret = yield self.handlers.admin_handler.get_users() + ret = await self.handlers.admin_handler.get_users() return 200, ret @@ -122,8 +119,7 @@ class UserRegisterServlet(RestServlet): self.nonces[nonce] = int(self.reactor.seconds()) return 200, {"nonce": nonce} - @defer.inlineCallbacks - def on_POST(self, request): + async def on_POST(self, request): self._clear_old_nonces() if not self.hs.config.registration_shared_secret: @@ -204,14 +200,14 @@ class UserRegisterServlet(RestServlet): register = RegisterRestServlet(self.hs) - user_id = yield register.registration_handler.register_user( + user_id = await register.registration_handler.register_user( localpart=body["username"].lower(), password=body["password"], admin=bool(admin), user_type=user_type, ) - result = yield register._create_registration_details(user_id, body) + result = await register._create_registration_details(user_id, body) return 200, result @@ -223,19 +219,18 @@ class WhoisRestServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, user_id): + async def on_GET(self, request, user_id): target_user = UserID.from_string(user_id) - requester = yield self.auth.get_user_by_req(request) + requester = await self.auth.get_user_by_req(request) auth_user = requester.user if target_user != auth_user: - yield assert_user_is_admin(self.auth, auth_user) + await assert_user_is_admin(self.auth, auth_user) if not self.hs.is_mine(target_user): raise SynapseError(400, "Can only whois a local user") - ret = yield self.handlers.admin_handler.get_whois(target_user) + ret = await self.handlers.admin_handler.get_whois(target_user) return 200, ret @@ -255,9 +250,8 @@ class PurgeHistoryRestServlet(RestServlet): self.store = hs.get_datastore() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, room_id, event_id): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request, room_id, event_id): + await assert_requester_is_admin(self.auth, request) body = parse_json_object_from_request(request, allow_empty_body=True) @@ -270,12 +264,12 @@ class PurgeHistoryRestServlet(RestServlet): event_id = body.get("purge_up_to_event_id") if event_id is not None: - event = yield self.store.get_event(event_id) + event = await self.store.get_event(event_id) if event.room_id != room_id: raise SynapseError(400, "Event is for wrong room.") - token = yield self.store.get_topological_token_for_event(event_id) + token = await self.store.get_topological_token_for_event(event_id) logger.info("[purge] purging up to token %s (event_id %s)", token, event_id) elif "purge_up_to_ts" in body: @@ -285,12 +279,10 @@ class PurgeHistoryRestServlet(RestServlet): 400, "purge_up_to_ts must be an int", errcode=Codes.BAD_JSON ) - stream_ordering = (yield self.store.find_first_stream_ordering_after_ts(ts)) + stream_ordering = await self.store.find_first_stream_ordering_after_ts(ts) - r = ( - yield self.store.get_room_event_after_stream_ordering( - room_id, stream_ordering - ) + r = await self.store.get_room_event_after_stream_ordering( + room_id, stream_ordering ) if not r: logger.warn( @@ -318,7 +310,7 @@ class PurgeHistoryRestServlet(RestServlet): errcode=Codes.BAD_JSON, ) - purge_id = yield self.pagination_handler.start_purge_history( + purge_id = await self.pagination_handler.start_purge_history( room_id, token, delete_local_events=delete_local_events ) @@ -339,9 +331,8 @@ class PurgeHistoryStatusRestServlet(RestServlet): self.pagination_handler = hs.get_pagination_handler() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_GET(self, request, purge_id): - yield assert_requester_is_admin(self.auth, request) + async def on_GET(self, request, purge_id): + await assert_requester_is_admin(self.auth, request) purge_status = self.pagination_handler.get_purge_status(purge_id) if purge_status is None: @@ -357,9 +348,8 @@ class DeactivateAccountRestServlet(RestServlet): self._deactivate_account_handler = hs.get_deactivate_account_handler() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, target_user_id): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request, target_user_id): + await assert_requester_is_admin(self.auth, request) body = parse_json_object_from_request(request, allow_empty_body=True) erase = body.get("erase", False) if not isinstance(erase, bool): @@ -371,7 +361,7 @@ class DeactivateAccountRestServlet(RestServlet): UserID.from_string(target_user_id) - result = yield self._deactivate_account_handler.deactivate_account( + result = await self._deactivate_account_handler.deactivate_account( target_user_id, erase ) if result: @@ -405,10 +395,9 @@ class ShutdownRoomRestServlet(RestServlet): self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, room_id): - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + async def on_POST(self, request, room_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) content = parse_json_object_from_request(request) assert_params_in_dict(content, ["new_room_user_id"]) @@ -419,7 +408,7 @@ class ShutdownRoomRestServlet(RestServlet): message = content.get("message", self.DEFAULT_MESSAGE) room_name = content.get("room_name", "Content Violation Notification") - info = yield self._room_creation_handler.create_room( + info = await self._room_creation_handler.create_room( room_creator_requester, config={ "preset": "public_chat", @@ -438,9 +427,9 @@ class ShutdownRoomRestServlet(RestServlet): # This will work even if the room is already blocked, but that is # desirable in case the first attempt at blocking the room failed below. - yield self.store.block_room(room_id, requester_user_id) + await self.store.block_room(room_id, requester_user_id) - users = yield self.state.get_current_users_in_room(room_id) + users = await self.state.get_current_users_in_room(room_id) kicked_users = [] failed_to_kick_users = [] for user_id in users: @@ -451,7 +440,7 @@ class ShutdownRoomRestServlet(RestServlet): try: target_requester = create_requester(user_id) - yield self.room_member_handler.update_membership( + await self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=room_id, @@ -461,9 +450,9 @@ class ShutdownRoomRestServlet(RestServlet): require_consent=False, ) - yield self.room_member_handler.forget(target_requester.user, room_id) + await self.room_member_handler.forget(target_requester.user, room_id) - yield self.room_member_handler.update_membership( + await self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=new_room_id, @@ -480,7 +469,7 @@ class ShutdownRoomRestServlet(RestServlet): ) failed_to_kick_users.append(user_id) - yield self.event_creation_handler.create_and_send_nonmember_event( + await self.event_creation_handler.create_and_send_nonmember_event( room_creator_requester, { "type": "m.room.message", @@ -491,9 +480,9 @@ class ShutdownRoomRestServlet(RestServlet): ratelimit=False, ) - aliases_for_room = yield self.store.get_aliases_for_room(room_id) + aliases_for_room = await self.store.get_aliases_for_room(room_id) - yield self.store.update_aliases_for_room( + await self.store.update_aliases_for_room( room_id, new_room_id, requester_user_id ) @@ -532,13 +521,12 @@ class ResetPasswordRestServlet(RestServlet): self.auth = hs.get_auth() self._set_password_handler = hs.get_set_password_handler() - @defer.inlineCallbacks - def on_POST(self, request, target_user_id): + async def on_POST(self, request, target_user_id): """Post request to allow an administrator reset password for a user. This needs user to have administrator access in Synapse. """ - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) UserID.from_string(target_user_id) @@ -546,7 +534,7 @@ class ResetPasswordRestServlet(RestServlet): assert_params_in_dict(params, ["new_password"]) new_password = params["new_password"] - yield self._set_password_handler.set_password( + await self._set_password_handler.set_password( target_user_id, new_password, requester ) return 200, {} @@ -572,12 +560,11 @@ class GetUsersPaginatedRestServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, target_user_id): + async def on_GET(self, request, target_user_id): """Get request to get specific number of users from Synapse. This needs user to have administrator access in Synapse. """ - yield assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self.auth, request) target_user = UserID.from_string(target_user_id) @@ -590,11 +577,10 @@ class GetUsersPaginatedRestServlet(RestServlet): logger.info("limit: %s, start: %s", limit, start) - ret = yield self.handlers.admin_handler.get_users_paginate(order, start, limit) + ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit) return 200, ret - @defer.inlineCallbacks - def on_POST(self, request, target_user_id): + async def on_POST(self, request, target_user_id): """Post request to get specific number of users from Synapse.. This needs user to have administrator access in Synapse. Example: @@ -608,7 +594,7 @@ class GetUsersPaginatedRestServlet(RestServlet): Returns: 200 OK with json object {list[dict[str, Any]], count} or empty object. """ - yield assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self.auth, request) UserID.from_string(target_user_id) order = "name" # order by name in user table @@ -618,7 +604,7 @@ class GetUsersPaginatedRestServlet(RestServlet): start = params["start"] logger.info("limit: %s, start: %s", limit, start) - ret = yield self.handlers.admin_handler.get_users_paginate(order, start, limit) + ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit) return 200, ret @@ -641,13 +627,12 @@ class SearchUsersRestServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, target_user_id): + async def on_GET(self, request, target_user_id): """Get request to search user table for specific users according to search term. This needs user to have a administrator access in Synapse. """ - yield assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self.auth, request) target_user = UserID.from_string(target_user_id) @@ -661,7 +646,7 @@ class SearchUsersRestServlet(RestServlet): term = parse_string(request, "term", required=True) logger.info("term: %s ", term) - ret = yield self.handlers.admin_handler.search_users(term) + ret = await self.handlers.admin_handler.search_users(term) return 200, ret @@ -676,15 +661,14 @@ class DeleteGroupAdminRestServlet(RestServlet): self.is_mine_id = hs.is_mine_id self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, group_id): - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + async def on_POST(self, request, group_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) if not self.is_mine_id(group_id): raise SynapseError(400, "Can only delete local groups") - yield self.group_server.delete_group(group_id, requester.user.to_string()) + await self.group_server.delete_group(group_id, requester.user.to_string()) return 200, {} @@ -700,16 +684,15 @@ class AccountValidityRenewServlet(RestServlet): self.account_activity_handler = hs.get_account_validity_handler() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request): + await assert_requester_is_admin(self.auth, request) body = parse_json_object_from_request(request) if "user_id" not in body: raise SynapseError(400, "Missing property 'user_id' in the request body") - expiration_ts = yield self.account_activity_handler.renew_account_for_user( + expiration_ts = await self.account_activity_handler.renew_account_for_user( body["user_id"], body.get("expiration_ts"), not body.get("enable_renewal_emails", True), -- cgit 1.4.1 From 281f887090d38f14df9b011ad1e5379a25be9c3e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Oct 2019 12:17:59 +0100 Subject: Port synaps/rest/client/media.py to async/await --- synapse/rest/admin/media.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index ed7086d09c..fa833e54cf 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -16,8 +16,6 @@ import logging -from twisted.internet import defer - from synapse.api.errors import AuthError from synapse.http.servlet import RestServlet, parse_integer from synapse.rest.admin._base import ( @@ -40,12 +38,11 @@ class QuarantineMediaInRoom(RestServlet): self.store = hs.get_datastore() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, room_id): - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + async def on_POST(self, request, room_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) - num_quarantined = yield self.store.quarantine_media_ids_in_room( + num_quarantined = await self.store.quarantine_media_ids_in_room( room_id, requester.user.to_string() ) @@ -62,14 +59,13 @@ class ListMediaInRoom(RestServlet): self.store = hs.get_datastore() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_GET(self, request, room_id): - requester = yield self.auth.get_user_by_req(request) - is_admin = yield self.auth.is_server_admin(requester.user) + async def on_GET(self, request, room_id): + requester = await self.auth.get_user_by_req(request) + is_admin = await self.auth.is_server_admin(requester.user) if not is_admin: raise AuthError(403, "You are not a server admin") - local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id) + local_mxcs, remote_mxcs = await self.store.get_media_mxcs_in_room(room_id) return 200, {"local": local_mxcs, "remote": remote_mxcs} @@ -81,14 +77,13 @@ class PurgeMediaCacheRestServlet(RestServlet): self.media_repository = hs.get_media_repository() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request): + await assert_requester_is_admin(self.auth, request) before_ts = parse_integer(request, "before_ts", required=True) logger.info("before_ts: %r", before_ts) - ret = yield self.media_repository.delete_old_remote_media(before_ts) + ret = await self.media_repository.delete_old_remote_media(before_ts) return 200, ret -- cgit 1.4.1 From f95325e22a829fa94e0e4f3e6cc832c799324cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Oct 2019 12:20:24 +0100 Subject: Port synaps/rest/client/server_notice_servlet.py to async/await --- synapse/rest/admin/server_notice_servlet.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/admin/server_notice_servlet.py b/synapse/rest/admin/server_notice_servlet.py index ae2cbe2e0a..6e9a874121 100644 --- a/synapse/rest/admin/server_notice_servlet.py +++ b/synapse/rest/admin/server_notice_servlet.py @@ -14,8 +14,6 @@ # limitations under the License. import re -from twisted.internet import defer - from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.http.servlet import ( @@ -69,9 +67,8 @@ class SendServerNoticeServlet(RestServlet): self.__class__.__name__, ) - @defer.inlineCallbacks - def on_POST(self, request, txn_id=None): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request, txn_id=None): + await assert_requester_is_admin(self.auth, request) body = parse_json_object_from_request(request) assert_params_in_dict(body, ("user_id", "content")) event_type = body.get("type", EventTypes.Message) @@ -85,7 +82,7 @@ class SendServerNoticeServlet(RestServlet): if not self.hs.is_mine_id(user_id): raise SynapseError(400, "Server notices can only be sent to local users") - event = yield self.snm.send_notice( + event = await self.snm.send_notice( user_id=body["user_id"], type=event_type, state_key=state_key, -- cgit 1.4.1 From dfbb62c28de8e6484fcc5ffdc89a5d62ae40c983 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Oct 2019 12:20:29 +0100 Subject: Port synaps/rest/client/users.py to async/await --- synapse/rest/admin/users.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 9720a3bab0..d5d124a0dc 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -14,8 +14,6 @@ # limitations under the License. import re -from twisted.internet import defer - from synapse.api.errors import SynapseError from synapse.http.servlet import ( RestServlet, @@ -59,24 +57,22 @@ class UserAdminServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, user_id): - yield assert_requester_is_admin(self.auth, request) + async def on_GET(self, request, user_id): + await assert_requester_is_admin(self.auth, request) target_user = UserID.from_string(user_id) if not self.hs.is_mine(target_user): raise SynapseError(400, "Only local users can be admins of this homeserver") - is_admin = yield self.handlers.admin_handler.get_user_server_admin(target_user) + is_admin = await self.handlers.admin_handler.get_user_server_admin(target_user) is_admin = bool(is_admin) return 200, {"admin": is_admin} - @defer.inlineCallbacks - def on_PUT(self, request, user_id): - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + async def on_PUT(self, request, user_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) auth_user = requester.user target_user = UserID.from_string(user_id) @@ -93,7 +89,7 @@ class UserAdminServlet(RestServlet): if target_user == auth_user and not set_admin_to: raise SynapseError(400, "You may not demote yourself.") - yield self.handlers.admin_handler.set_user_server_admin( + await self.handlers.admin_handler.set_user_server_admin( target_user, set_admin_to ) -- cgit 1.4.1 From be9b55e0d2b758bd7d9be4273253ea115c5362a3 Mon Sep 17 00:00:00 2001 From: Valérian Rousset Date: Fri, 11 Oct 2019 13:33:12 +0200 Subject: cas: support setting display name (#6114) Now, the CAS server can return an attribute stating what's the desired displayname, instead of using the username directly. --- changelog.d/6114.feature | 1 + docs/sample_config.yaml | 1 + synapse/config/cas.py | 3 +++ synapse/rest/client/v1/login.py | 4 +++- 4 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6114.feature (limited to 'synapse') diff --git a/changelog.d/6114.feature b/changelog.d/6114.feature new file mode 100644 index 0000000000..a34ab12148 --- /dev/null +++ b/changelog.d/6114.feature @@ -0,0 +1 @@ +CAS login now provides a default display name for users if a `displayname_attribute` is set in the configuration file. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 43893399ad..8226978ba6 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1220,6 +1220,7 @@ saml2_config: # enabled: true # server_url: "https://cas-server.com" # service_url: "https://homeserver.domain.com:8448" +# #displayname_attribute: name # #required_attributes: # # name: value diff --git a/synapse/config/cas.py b/synapse/config/cas.py index b916c3aa66..4526c1a67b 100644 --- a/synapse/config/cas.py +++ b/synapse/config/cas.py @@ -30,11 +30,13 @@ class CasConfig(Config): self.cas_enabled = cas_config.get("enabled", True) self.cas_server_url = cas_config["server_url"] self.cas_service_url = cas_config["service_url"] + self.cas_displayname_attribute = cas_config.get("displayname_attribute") self.cas_required_attributes = cas_config.get("required_attributes", {}) else: self.cas_enabled = False self.cas_server_url = None self.cas_service_url = None + self.cas_displayname_attribute = None self.cas_required_attributes = {} def generate_config_section(self, config_dir_path, server_name, **kwargs): @@ -45,6 +47,7 @@ class CasConfig(Config): # enabled: true # server_url: "https://cas-server.com" # service_url: "https://homeserver.domain.com:8448" + # #displayname_attribute: name # #required_attributes: # # name: value """ diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 9cddbc752a..8414af08cb 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -377,6 +377,7 @@ class CasTicketServlet(RestServlet): super(CasTicketServlet, self).__init__() self.cas_server_url = hs.config.cas_server_url self.cas_service_url = hs.config.cas_service_url + self.cas_displayname_attribute = hs.config.cas_displayname_attribute self.cas_required_attributes = hs.config.cas_required_attributes self._sso_auth_handler = SSOAuthHandler(hs) self._http_client = hs.get_simple_http_client() @@ -400,6 +401,7 @@ class CasTicketServlet(RestServlet): def handle_cas_response(self, request, cas_response_body, client_redirect_url): user, attributes = self.parse_cas_response(cas_response_body) + displayname = attributes.pop(self.cas_displayname_attribute, None) for required_attribute, required_value in self.cas_required_attributes.items(): # If required attribute was not in CAS Response - Forbidden @@ -414,7 +416,7 @@ class CasTicketServlet(RestServlet): raise LoginError(401, "Unauthorized", errcode=Codes.UNAUTHORIZED) return self._sso_auth_handler.on_successful_auth( - user, request, client_redirect_url + user, request, client_redirect_url, displayname ) def parse_cas_response(self, cas_response_body): -- cgit 1.4.1 From 3c2d6c708cd93df7fc945e10014049e9f9b36f46 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Oct 2019 15:26:09 +0100 Subject: Add maybe_awaitable and fix __init__ bugs --- synapse/rest/admin/__init__.py | 7 +++++-- synapse/util/async_helpers.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index f7b9483008..939418ee2b 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -44,6 +44,7 @@ from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet from synapse.rest.admin.users import UserAdminServlet from synapse.types import UserID, create_requester +from synapse.util.async_helpers import maybe_awaitable from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) @@ -310,7 +311,7 @@ class PurgeHistoryRestServlet(RestServlet): errcode=Codes.BAD_JSON, ) - purge_id = await self.pagination_handler.start_purge_history( + purge_id = self.pagination_handler.start_purge_history( room_id, token, delete_local_events=delete_local_events ) @@ -480,7 +481,9 @@ class ShutdownRoomRestServlet(RestServlet): ratelimit=False, ) - aliases_for_room = await self.store.get_aliases_for_room(room_id) + aliases_for_room = await maybe_awaitable( + self.store.get_aliases_for_room(room_id) + ) await self.store.update_aliases_for_room( room_id, new_room_id, requester_user_id diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index 0d3bdd88ce..804dbca443 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -21,6 +21,8 @@ from typing import Dict, Sequence, Set, Union from six.moves import range +import attr + from twisted.internet import defer from twisted.internet.defer import CancelledError from twisted.python import failure @@ -483,3 +485,30 @@ def timeout_deferred(deferred, timeout, reactor, on_timeout_cancel=None): deferred.addCallbacks(success_cb, failure_cb) return new_d + + +@attr.s(slots=True, frozen=True) +class DoneAwaitable(object): + """Simple awaitable that returns the provided value. + """ + + value = attr.ib() + + def __await__(self): + return self + + def __iter__(self): + return self + + def __next__(self): + raise StopIteration(self.value) + + +def maybe_awaitable(value): + """Convert a value to an awaitable if not already an awaitable. + """ + + if hasattr(value, "__await__"): + return value + + return DoneAwaitable(value) -- cgit 1.4.1 From 2e97a4c1978639c0beffd62417e51724ae2c77db Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Oct 2019 15:29:26 +0100 Subject: Port synapse/rest/client/_base.py to async/await --- synapse/rest/admin/_base.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/admin/_base.py b/synapse/rest/admin/_base.py index 5a9b08d3ef..afd0647205 100644 --- a/synapse/rest/admin/_base.py +++ b/synapse/rest/admin/_base.py @@ -15,8 +15,6 @@ import re -from twisted.internet import defer - from synapse.api.errors import AuthError @@ -42,8 +40,7 @@ def historical_admin_path_patterns(path_regex): ) -@defer.inlineCallbacks -def assert_requester_is_admin(auth, request): +async def assert_requester_is_admin(auth, request): """Verify that the requester is an admin user WARNING: MAKE SURE YOU YIELD ON THE RESULT! @@ -58,12 +55,11 @@ def assert_requester_is_admin(auth, request): Raises: AuthError if the requester is not an admin """ - requester = yield auth.get_user_by_req(request) - yield assert_user_is_admin(auth, requester.user) + requester = await auth.get_user_by_req(request) + await assert_user_is_admin(auth, requester.user) -@defer.inlineCallbacks -def assert_user_is_admin(auth, user_id): +async def assert_user_is_admin(auth, user_id): """Verify that the given user is an admin user WARNING: MAKE SURE YOU YIELD ON THE RESULT! @@ -79,6 +75,6 @@ def assert_user_is_admin(auth, user_id): AuthError if the user is not an admin """ - is_admin = yield auth.is_server_admin(user_id) + is_admin = await auth.is_server_admin(user_id) if not is_admin: raise AuthError(403, "You are not a server admin") -- cgit 1.4.1 From 132b251e2963b0e509afe00796f1b227e567b989 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Fri, 11 Oct 2019 14:24:52 -0400 Subject: expand on comment --- synapse/storage/end_to_end_keys.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index b00a391c82..872bc75490 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -40,7 +40,8 @@ class EndToEndKeyWorkerStore(SQLBaseStore): This option only takes effect if include_all_devices is true. Returns: Dict mapping from user-id to dict mapping from device_id to - key data. + key data. The key data will be a dict in the same format as the + DeviceKeys type returned by POST /_matrix/client/r0/keys/query. """ set_tag("query_list", query_list) if not query_list: -- cgit 1.4.1 From a2bb50c2eb431414d999ec682b236620528b00e1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 9 Oct 2019 15:39:13 +0100 Subject: Merge pull request #6185 from matrix-org/erikj/fix_censored_evnets Fix inserting bytes as text in `censor_redactions` --- changelog.d/6185.bugfix | 1 + synapse/storage/events.py | 6 ++--- .../56/redaction_censor3_fix_update.sql.postgres | 26 ++++++++++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6185.bugfix create mode 100644 synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres (limited to 'synapse') diff --git a/changelog.d/6185.bugfix b/changelog.d/6185.bugfix new file mode 100644 index 0000000000..9d1c669b88 --- /dev/null +++ b/changelog.d/6185.bugfix @@ -0,0 +1 @@ +Fix bug where redacted events were sometimes incorrectly censored in the database, breaking APIs that attempted to fetch such events. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2e485c8644..bb6ff0595a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -23,7 +23,7 @@ from functools import wraps from six import iteritems, text_type from six.moves import range -from canonicaljson import encode_canonical_json, json +from canonicaljson import json from prometheus_client import Counter, Histogram from twisted.internet import defer @@ -1632,9 +1632,7 @@ class EventsStore( and original_event.internal_metadata.is_redacted() ): # Redaction was allowed - pruned_json = encode_canonical_json( - prune_event_dict(original_event.get_dict()) - ) + pruned_json = encode_json(prune_event_dict(original_event.get_dict())) else: # Redaction wasn't allowed pruned_json = None diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres new file mode 100644 index 0000000000..f7bcc5e2f2 --- /dev/null +++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres @@ -0,0 +1,26 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- There was a bug where we may have updated censored redactions as bytes, +-- which can (somehow) cause json to be inserted hex encoded. This goes and +-- undoes any such hex encoded JSON. +UPDATE event_json SET json = convert_from(json::bytea, 'utf8') +WHERE event_id IN ( + SELECT event_json.event_id + FROM event_json + INNER JOIN redactions ON (event_json.event_id = redacts) + WHERE have_censored AND json NOT LIKE '{%' +); -- cgit 1.4.1 From 5b0e9948eaae801643e594b5abc8ee4b10bd194e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 9 Oct 2019 16:03:24 +0100 Subject: Do the update as a background index --- synapse/storage/events_bg_updates.py | 43 ++++++++++++++++++++++ .../56/redaction_censor3_fix_update.sql.postgres | 17 ++++----- 2 files changed, 51 insertions(+), 9 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 5717baf48c..e77a7e28af 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -71,6 +71,19 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): "redactions_received_ts", self._redactions_received_ts ) + # This index gets deleted in `event_fix_redactions_bytes` update + self.register_background_index_update( + "event_fix_redactions_bytes_create_index", + index_name="redactions_censored_redacts", + table="redactions", + columns=["redacts"], + where_clause="have_censored", + ) + + self.register_background_update_handler( + "event_fix_redactions_bytes", self._event_fix_redactions_bytes + ) + @defer.inlineCallbacks def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] @@ -458,3 +471,33 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): yield self._end_background_update("redactions_received_ts") return count + + @defer.inlineCallbacks + def _event_fix_redactions_bytes(self, progress, batch_size): + """Undoes hex encoded censored redacted event JSON. + """ + + def _event_fix_redactions_bytes_txn(txn): + # This update is quite fast due to new index. + txn.execute( + """ + UPDATE event_json + SET + json = convert_from(json::bytea, 'utf8') + FROM redactions + WHERE + redactions.have_censored + AND event_json.event_id = redactions.redacts + AND json NOT LIKE '{%'; + """ + ) + + txn.execute("DROP INDEX redactions_censored_redacts") + + yield self.runInteraction( + "_event_fix_redactions_bytes", _event_fix_redactions_bytes_txn + ) + + yield self._end_background_update("event_fix_redactions_bytes") + + return 1 diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres index f7bcc5e2f2..67471f3ef5 100644 --- a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres +++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres @@ -15,12 +15,11 @@ -- There was a bug where we may have updated censored redactions as bytes, --- which can (somehow) cause json to be inserted hex encoded. This goes and --- undoes any such hex encoded JSON. -UPDATE event_json SET json = convert_from(json::bytea, 'utf8') -WHERE event_id IN ( - SELECT event_json.event_id - FROM event_json - INNER JOIN redactions ON (event_json.event_id = redacts) - WHERE have_censored AND json NOT LIKE '{%' -); +-- which can (somehow) cause json to be inserted hex encoded. These updates go +-- and undoes any such hex encoded JSON. + +INSERT into background_updates (update_name, progress_json) + VALUES ('event_fix_redactions_bytes_create_index', '{}'); + +INSERT into background_updates (update_name, progress_json, depends_on) + VALUES ('event_fix_redactions_bytes', '{}', 'event_fix_redactions_bytes_create_index'); -- cgit 1.4.1 From 71cd3fed669a55e6ef000591ca89fe01b37a5ee1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 17 Oct 2019 16:40:56 +0100 Subject: 1.4.1rc1 --- CHANGES.md | 8 ++++++++ changelog.d/6185.bugfix | 1 - synapse/__init__.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) delete mode 100644 changelog.d/6185.bugfix (limited to 'synapse') diff --git a/CHANGES.md b/CHANGES.md index 165e1d4db4..ecba33bd30 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,11 @@ +Synapse 1.4.1rc1 (2019-10-17) +============================= + +Bugfixes +-------- + +- Fix bug where redacted events were sometimes incorrectly censored in the database, breaking APIs that attempted to fetch such events. ([\#6185](https://github.com/matrix-org/synapse/issues/6185), [5b0e9948](https://github.com/matrix-org/synapse/commit/5b0e9948eaae801643e594b5abc8ee4b10bd194e)) + Synapse 1.4.0 (2019-10-03) ========================== diff --git a/changelog.d/6185.bugfix b/changelog.d/6185.bugfix deleted file mode 100644 index 9d1c669b88..0000000000 --- a/changelog.d/6185.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where redacted events were sometimes incorrectly censored in the database, breaking APIs that attempted to fetch such events. diff --git a/synapse/__init__.py b/synapse/__init__.py index 2d52d26af5..2fc0c3cf85 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -35,4 +35,4 @@ try: except ImportError: pass -__version__ = "1.4.0" +__version__ = "1.4.1rc1" -- cgit 1.4.1 From 1ba359a11f238fa8d9b6319067d1b0acefdba20a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 11 Oct 2019 14:50:48 +0100 Subject: rip out some unreachable code The only possible rejection reason is AUTH_ERROR, so all of this is unreachable. --- synapse/api/constants.py | 2 - synapse/federation/federation_client.py | 38 ------------ synapse/federation/transport/client.py | 11 ---- synapse/handlers/federation.py | 102 -------------------------------- 4 files changed, 153 deletions(-) (limited to 'synapse') diff --git a/synapse/api/constants.py b/synapse/api/constants.py index f29bce560c..60e99e4663 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -97,8 +97,6 @@ class EventTypes(object): class RejectedReason(object): AUTH_ERROR = "auth_error" - REPLACED = "replaced" - NOT_ANCESTOR = "not_ancestor" class RoomCreationPreset(object): diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 6ee6216660..5b22a39b7f 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -878,44 +878,6 @@ class FederationClient(FederationBase): third_party_instance_id=third_party_instance_id, ) - @defer.inlineCallbacks - def query_auth(self, destination, room_id, event_id, local_auth): - """ - Params: - destination (str) - event_it (str) - local_auth (list) - """ - time_now = self._clock.time_msec() - - send_content = {"auth_chain": [e.get_pdu_json(time_now) for e in local_auth]} - - code, content = yield self.transport_layer.send_query_auth( - destination=destination, - room_id=room_id, - event_id=event_id, - content=send_content, - ) - - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - - auth_chain = [event_from_pdu_json(e, format_ver) for e in content["auth_chain"]] - - signed_auth = yield self._check_sigs_and_hash_and_fetch( - destination, auth_chain, outlier=True, room_version=room_version - ) - - signed_auth.sort(key=lambda e: e.depth) - - ret = { - "auth_chain": signed_auth, - "rejects": content.get("rejects", []), - "missing": content.get("missing", []), - } - - return ret - @defer.inlineCallbacks def get_missing_events( self, diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 482a101c09..7b18408144 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -381,17 +381,6 @@ class TransportLayerClient(object): return content - @defer.inlineCallbacks - @log_function - def send_query_auth(self, destination, room_id, event_id, content): - path = _create_v1_path("/query_auth/%s/%s", room_id, event_id) - - content = yield self.client.post_json( - destination=destination, path=path, data=content - ) - - return content - @defer.inlineCallbacks @log_function def query_client_keys(self, destination, query_content, timeout): diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 50fc0fde2a..57f661f16e 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2181,103 +2181,10 @@ class FederationHandler(BaseHandler): auth_events.update(new_state) - different_auth = event_auth_events.difference( - e.event_id for e in auth_events.values() - ) - yield self._update_context_for_auth_events( event, context, auth_events, event_key ) - if not different_auth: - # we're done - return - - logger.info( - "auth_events still refers to events which are not in the calculated auth " - "chain after state resolution: %s", - different_auth, - ) - - # Only do auth resolution if we have something new to say. - # We can't prove an auth failure. - do_resolution = False - - for e_id in different_auth: - if e_id in have_events: - if have_events[e_id] == RejectedReason.NOT_ANCESTOR: - do_resolution = True - break - - if not do_resolution: - logger.info( - "Skipping auth resolution due to lack of provable rejection reasons" - ) - return - - logger.info("Doing auth resolution") - - prev_state_ids = yield context.get_prev_state_ids(self.store) - - # 1. Get what we think is the auth chain. - auth_ids = yield self.auth.compute_auth_events(event, prev_state_ids) - local_auth_chain = yield self.store.get_auth_chain(auth_ids, include_given=True) - - try: - # 2. Get remote difference. - try: - result = yield self.federation_client.query_auth( - origin, event.room_id, event.event_id, local_auth_chain - ) - except RequestSendFailed as e: - # The other side isn't around or doesn't implement the - # endpoint, so lets just bail out. - logger.info("Failed to query auth from remote: %s", e) - return - - seen_remotes = yield self.store.have_seen_events( - [e.event_id for e in result["auth_chain"]] - ) - - # 3. Process any remote auth chain events we haven't seen. - for ev in result["auth_chain"]: - if ev.event_id in seen_remotes: - continue - - if ev.event_id == event.event_id: - continue - - try: - auth_ids = ev.auth_event_ids() - auth = { - (e.type, e.state_key): e - for e in result["auth_chain"] - if e.event_id in auth_ids or event.type == EventTypes.Create - } - ev.internal_metadata.outlier = True - - logger.debug( - "do_auth %s different_auth: %s", event.event_id, e.event_id - ) - - yield self._handle_new_event(origin, ev, auth_events=auth) - - if ev.event_id in event_auth_events: - auth_events[(ev.type, ev.state_key)] = ev - except AuthError: - pass - - except Exception: - # FIXME: - logger.exception("Failed to query auth chain") - - # 4. Look at rejects and their proofs. - # TODO. - - yield self._update_context_for_auth_events( - event, context, auth_events, event_key - ) - @defer.inlineCallbacks def _update_context_for_auth_events(self, event, context, auth_events, event_key): """Update the state_ids in an event context after auth event resolution, @@ -2444,15 +2351,6 @@ class FederationHandler(BaseHandler): reason_map[e.event_id] = reason - if reason == RejectedReason.AUTH_ERROR: - pass - elif reason == RejectedReason.REPLACED: - # TODO: Get proof - pass - elif reason == RejectedReason.NOT_ANCESTOR: - # TODO: Get proof. - pass - logger.debug("construct_auth_difference returning") return { -- cgit 1.4.1 From 5859a5c569c03f3b7c578fe4dbf2274e37af03bb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 18 Oct 2019 07:42:26 +0200 Subject: Fix presence timeouts when synchrotron restarts. (#6212) * Fix presence timeouts when synchrotron restarts. Handling timeouts would fail if there was an external process that had timed out, e.g. a synchrotron restarting. This was due to a couple of variable name typoes. Fixes #3715. --- changelog.d/6212.bugfix | 1 + synapse/handlers/presence.py | 13 +++++++++---- tests/handlers/test_presence.py | 39 +++++++++++++++++++++++++++++++++++++++ tox.ini | 2 +- 4 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 changelog.d/6212.bugfix (limited to 'synapse') diff --git a/changelog.d/6212.bugfix b/changelog.d/6212.bugfix new file mode 100644 index 0000000000..918755fee0 --- /dev/null +++ b/changelog.d/6212.bugfix @@ -0,0 +1 @@ +Fix bug where presence would not get timed out correctly if a synchrotron worker is used and restarted. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 2a5f1a007d..eda15bc623 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -24,6 +24,7 @@ The methods that define policy are: import logging from contextlib import contextmanager +from typing import Dict, Set from six import iteritems, itervalues @@ -179,8 +180,9 @@ class PresenceHandler(object): # we assume that all the sync requests on that process have stopped. # Stored as a dict from process_id to set of user_id, and a dict of # process_id to millisecond timestamp last updated. - self.external_process_to_current_syncs = {} - self.external_process_last_updated_ms = {} + self.external_process_to_current_syncs = {} # type: Dict[int, Set[str]] + self.external_process_last_updated_ms = {} # type: Dict[int, int] + self.external_sync_linearizer = Linearizer(name="external_sync_linearizer") # Start a LoopingCall in 30s that fires every 5s. @@ -349,10 +351,13 @@ class PresenceHandler(object): if now - last_update > EXTERNAL_PROCESS_EXPIRY ] for process_id in expired_process_ids: + # For each expired process drop tracking info and check the users + # that were syncing on that process to see if they need to be timed + # out. users_to_check.update( - self.external_process_last_updated_ms.pop(process_id, ()) + self.external_process_to_current_syncs.pop(process_id, ()) ) - self.external_process_last_update.pop(process_id) + self.external_process_last_updated_ms.pop(process_id) states = [ self.user_to_current_state.get(user_id, UserPresenceState.default(user_id)) diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index f70c6e7d65..d4293b4312 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -22,6 +22,7 @@ from synapse.api.constants import EventTypes, Membership, PresenceState from synapse.events import room_version_to_event_format from synapse.events.builder import EventBuilder from synapse.handlers.presence import ( + EXTERNAL_PROCESS_EXPIRY, FEDERATION_PING_INTERVAL, FEDERATION_TIMEOUT, IDLE_TIMER, @@ -413,6 +414,44 @@ class PresenceTimeoutTestCase(unittest.TestCase): self.assertEquals(state, new_state) +class PresenceHandlerTestCase(unittest.HomeserverTestCase): + def prepare(self, reactor, clock, hs): + self.presence_handler = hs.get_presence_handler() + self.clock = hs.get_clock() + + def test_external_process_timeout(self): + """Test that if an external process doesn't update the records for a while + we time out their syncing users presence. + """ + process_id = 1 + user_id = "@test:server" + + # Notify handler that a user is now syncing. + self.get_success( + self.presence_handler.update_external_syncs_row( + process_id, user_id, True, self.clock.time_msec() + ) + ) + + # Check that if we wait a while without telling the handler the user has + # stopped syncing that their presence state doesn't get timed out. + self.reactor.advance(EXTERNAL_PROCESS_EXPIRY / 2) + + state = self.get_success( + self.presence_handler.get_state(UserID.from_string(user_id)) + ) + self.assertEqual(state.state, PresenceState.ONLINE) + + # Check that if the external process timeout fires, then the syncing + # user gets timed out + self.reactor.advance(EXTERNAL_PROCESS_EXPIRY) + + state = self.get_success( + self.presence_handler.get_state(UserID.from_string(user_id)) + ) + self.assertEqual(state.state, PresenceState.OFFLINE) + + class PresenceJoinTestCase(unittest.HomeserverTestCase): """Tests remote servers get told about presence of users in the room when they join and when new local users join. diff --git a/tox.ini b/tox.ini index 367cc2ccf2..7ba6f6339f 100644 --- a/tox.ini +++ b/tox.ini @@ -161,7 +161,7 @@ basepython = python3.7 skip_install = True deps = {[base]deps} - mypy + mypy==0.730 mypy-zope env = MYPYPATH = stubs/ -- cgit 1.4.1 From 41b9faed16a2721540042d09b90a24721d18a8e9 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 18 Oct 2019 10:15:12 +0100 Subject: 1.4.1 --- CHANGES.md | 6 ++++++ debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/CHANGES.md b/CHANGES.md index ecba33bd30..7e92c3bf70 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +Synapse 1.4.1 (2019-10-18) +========================== + +No changes since 1.4.1rc1 + + Synapse 1.4.1rc1 (2019-10-17) ============================= diff --git a/debian/changelog b/debian/changelog index 60c682cc57..02f2b508c2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.4.1) stable; urgency=medium + + * New synapse release 1.4.1. + + -- Synapse Packaging team Fri, 18 Oct 2019 10:13:27 +0100 + matrix-synapse-py3 (1.4.0) stable; urgency=medium * New synapse release 1.4.0. diff --git a/synapse/__init__.py b/synapse/__init__.py index 2fc0c3cf85..a22567fcd5 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -35,4 +35,4 @@ try: except ImportError: pass -__version__ = "1.4.1rc1" +__version__ = "1.4.1" -- cgit 1.4.1 From f0f6a2b360829e0ba13dec239c586e95d46d60b4 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Fri, 18 Oct 2019 10:56:54 +0100 Subject: use the right function for when we're already in runInteraction --- synapse/storage/end_to_end_keys.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 8ce5dd8bf9..3c82f789fa 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -346,7 +346,8 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): # The "keys" property must only have one entry, which will be the public # key, so we just grab the first value in there pubkey = next(iter(key["keys"].values())) - self._simple_insert( + self._simple_insert_txn( + txn, "devices", values={ "user_id": user_id, @@ -354,12 +355,12 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): "display_name": key_type + " signing key", "hidden": True, }, - desc="store_master_key_device", ) # and finally, store the key itself with self._cross_signing_id_gen.get_next() as stream_id: - self._simple_insert( + self._simple_insert_txn( + txn, "e2e_cross_signing_keys", values={ "user_id": user_id, @@ -367,7 +368,6 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): "keydata": json.dumps(key), "stream_id": stream_id, }, - desc="store_master_key", ) def set_e2e_cross_signing_key(self, user_id, key_type, key): -- cgit 1.4.1 From 560c1222672a241d89e74a1befe2a9f778732fdc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 18 Oct 2019 13:34:33 +0200 Subject: Fix logging config for the docker image (#6197) Turns out that loggers that are instantiated before the config is loaded get turned off. Also bring the logging config that is generated by --generate-config into line. Fixes #6194. --- changelog.d/6197.docker | 1 + docker/conf/log.config | 2 ++ synapse/config/logger.py | 5 ++--- 3 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 changelog.d/6197.docker (limited to 'synapse') diff --git a/changelog.d/6197.docker b/changelog.d/6197.docker new file mode 100644 index 0000000000..71fb9cbff5 --- /dev/null +++ b/changelog.d/6197.docker @@ -0,0 +1 @@ +Fix logging getting lost for the docker image. diff --git a/docker/conf/log.config b/docker/conf/log.config index db35e475a4..ed418a57cd 100644 --- a/docker/conf/log.config +++ b/docker/conf/log.config @@ -24,3 +24,5 @@ loggers: root: level: {{ SYNAPSE_LOG_LEVEL or "INFO" }} handlers: [console] + +disable_existing_loggers: false diff --git a/synapse/config/logger.py b/synapse/config/logger.py index d609ec111b..be92e33f93 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -68,9 +68,6 @@ handlers: filters: [context] loggers: - synapse: - level: INFO - synapse.storage.SQL: # beware: increasing this to DEBUG will make synapse log sensitive # information such as access tokens. @@ -79,6 +76,8 @@ loggers: root: level: INFO handlers: [file, console] + +disable_existing_loggers: false """ ) -- cgit 1.4.1