From d1c7c2a98a133bdae7747601699a6b9ae0f90c8b Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 24 Jul 2019 23:21:52 -0400 Subject: allow devices to be marked as "hidden" This is a prerequisite for cross-signing, as it allows us to create other things that live within the device namespace, so they can be used for signatures. --- synapse/storage/devices.py | 63 ++++++++++++++++++------ synapse/storage/schema/delta/56/signing_keys.sql | 18 +++++++ 2 files changed, 65 insertions(+), 16 deletions(-) create mode 100644 synapse/storage/schema/delta/56/signing_keys.sql diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index d2b113a4e7..b73401bc26 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,7 +22,7 @@ from canonicaljson import json from twisted.internet import defer -from synapse.api.errors import StoreError +from synapse.api.errors import Codes, StoreError from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import Cache, SQLBaseStore, db_to_json from synapse.storage.background_updates import BackgroundUpdateStore @@ -35,6 +37,7 @@ DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( class DeviceWorkerStore(SQLBaseStore): + @defer.inlineCallbacks def get_device(self, user_id, device_id): """Retrieve a device. @@ -46,12 +49,15 @@ class DeviceWorkerStore(SQLBaseStore): Raises: StoreError: if the device is not found """ - return self._simple_select_one( + ret = yield self._simple_select_one( table="devices", keyvalues={"user_id": user_id, "device_id": device_id}, - retcols=("user_id", "device_id", "display_name"), + retcols=("user_id", "device_id", "display_name", "hidden"), desc="get_device", ) + if ret["hidden"]: + raise StoreError(404, "No row found (devices)") + return ret @defer.inlineCallbacks def get_devices_by_user(self, user_id): @@ -67,11 +73,11 @@ class DeviceWorkerStore(SQLBaseStore): devices = yield self._simple_select_list( table="devices", keyvalues={"user_id": user_id}, - retcols=("user_id", "device_id", "display_name"), + retcols=("user_id", "device_id", "display_name", "hidden"), desc="get_devices_by_user", ) - defer.returnValue({d["device_id"]: d for d in devices}) + defer.returnValue({d["device_id"]: d for d in devices if not d["hidden"]}) @defer.inlineCallbacks def get_devices_by_remote(self, destination, from_stream_id, limit): @@ -540,6 +546,8 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): Returns: defer.Deferred: boolean whether the device was inserted or an existing device existed with that ID. + Raises: + StoreError: if the device is already in use """ key = (user_id, device_id) if self.device_id_exists_cache.get(key, None): @@ -552,12 +560,25 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): "user_id": user_id, "device_id": device_id, "display_name": initial_device_display_name, + "hidden": False, }, desc="store_device", or_ignore=True, ) + if not inserted: + # if the device already exists, check if it's a real device, or + # if the device ID is reserved by something else + hidden = yield self._simple_select_one_onecol( + "devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + retcol="hidden", + ) + if hidden: + raise StoreError(400, "The device ID is in use", Codes.FORBIDDEN) self.device_id_exists_cache.prefill(key, True) defer.returnValue(inserted) + except StoreError: + raise except Exception as e: logger.error( "store_device with device_id=%s(%r) user_id=%s(%r)" @@ -582,11 +603,11 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): Returns: defer.Deferred """ - yield self._simple_delete_one( - table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, - desc="delete_device", - ) + sql = """ + DELETE FROM devices + WHERE user_id = ? AND device_id = ? AND NOT COALESCE(hidden, ?) + """ + yield self._execute("delete_device", None, sql, user_id, device_id, False) self.device_id_exists_cache.invalidate((user_id, device_id)) @@ -600,13 +621,21 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): Returns: defer.Deferred """ - yield self._simple_delete_many( - table="devices", - column="device_id", - iterable=device_ids, - keyvalues={"user_id": user_id}, - desc="delete_devices", + + if not device_ids or len(device_ids) == 0: + return + sql = """ + DELETE FROM devices + WHERE user_id = ? AND device_id IN (%s) AND NOT COALESCE(hidden, ?) + """ % ( + ",".join("?" for _ in device_ids) ) + values = [user_id] + values.extend(device_ids) + values.append(False) + + yield self._execute("delete_devices", None, sql, *values) + for device_id in device_ids: self.device_id_exists_cache.invalidate((user_id, device_id)) @@ -628,6 +657,8 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): updates["display_name"] = new_display_name if not updates: return defer.succeed(None) + # FIXME: should only update if hidden is not True. But updating the + # display name of a hidden device should be harmless return self._simple_update_one( table="devices", keyvalues={"user_id": user_id, "device_id": device_id}, diff --git a/synapse/storage/schema/delta/56/signing_keys.sql b/synapse/storage/schema/delta/56/signing_keys.sql new file mode 100644 index 0000000000..51c96d3116 --- /dev/null +++ b/synapse/storage/schema/delta/56/signing_keys.sql @@ -0,0 +1,18 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- device list needs to know which ones are "real" devices, and which ones are +-- just used to avoid collisions +ALTER TABLE devices ADD COLUMN hidden BOOLEAN NULLABLE; -- cgit 1.4.1 From c659b9f94fff29adfb2abe4f6b345710b65e8741 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 25 Jul 2019 11:08:24 -0400 Subject: allow uploading keys for cross-signing --- synapse/api/errors.py | 1 + synapse/handlers/device.py | 17 ++ synapse/handlers/e2e_keys.py | 198 ++++++++++++++++++++++- synapse/handlers/sync.py | 7 +- synapse/rest/client/v2_alpha/keys.py | 46 +++++- synapse/storage/__init__.py | 5 +- synapse/storage/devices.py | 57 +++++++ synapse/storage/end_to_end_keys.py | 174 +++++++++++++++++++- synapse/storage/schema/delta/56/signing_keys.sql | 41 +++++ synapse/types.py | 24 +++ tests/handlers/test_e2e_keys.py | 63 ++++++++ 11 files changed, 621 insertions(+), 12 deletions(-) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index ad3e262041..be15921bc6 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -61,6 +61,7 @@ class Codes(object): INCOMPATIBLE_ROOM_VERSION = "M_INCOMPATIBLE_ROOM_VERSION" WRONG_ROOM_KEYS_VERSION = "M_WRONG_ROOM_KEYS_VERSION" EXPIRED_ACCOUNT = "ORG_MATRIX_EXPIRED_ACCOUNT" + INVALID_SIGNATURE = "M_INVALID_SIGNATURE" class CodeMessageException(RuntimeError): diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 99e8413092..2a8fa9c818 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -408,6 +410,21 @@ class DeviceHandler(DeviceWorkerHandler): for host in hosts: self.federation_sender.send_device_messages(host) + @defer.inlineCallbacks + def notify_user_signature_update(self, from_user_id, user_ids): + """Notify a user that they have made new signatures of other users. + + Args: + from_user_id (str): the user who made the signature + user_ids (list[str]): the users IDs that have new signatures + """ + + position = yield self.store.add_user_signature_change_to_streams( + from_user_id, user_ids + ) + + self.notifier.on_new_event("device_list_key", position, users=[from_user_id]) + @defer.inlineCallbacks def on_federation_query_user_devices(self, user_id): stream_id, devices = yield self.store.get_devices_with_keys_by_user(user_id) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index fdfe8611b6..6187f879ef 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2018-2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,12 +20,17 @@ import logging from six import iteritems from canonicaljson import encode_canonical_json, json +from signedjson.sign import SignatureVerifyException, verify_signed_json from twisted.internet import defer -from synapse.api.errors import CodeMessageException, SynapseError +from synapse.api.errors import CodeMessageException, Codes, SynapseError from synapse.logging.context import make_deferred_yieldable, run_in_background -from synapse.types import UserID, get_domain_from_id +from synapse.types import ( + UserID, + get_domain_from_id, + get_verify_key_from_cross_signing_key, +) from synapse.util.retryutils import NotRetryingDestination logger = logging.getLogger(__name__) @@ -46,7 +52,7 @@ class E2eKeysHandler(object): ) @defer.inlineCallbacks - def query_devices(self, query_body, timeout): + def query_devices(self, query_body, timeout, from_user_id): """ Handle a device key query from a client { @@ -64,6 +70,11 @@ class E2eKeysHandler(object): } } } + + Args: + from_user_id (str): the user making the query. This is used when + adding cross-signing signatures to limit what signatures users + can see. """ device_keys_query = query_body.get("device_keys", {}) @@ -118,6 +129,11 @@ class E2eKeysHandler(object): r = remote_queries_not_in_cache.setdefault(domain, {}) r[user_id] = remote_queries[user_id] + # Get cached cross-signing keys + cross_signing_keys = yield self.query_cross_signing_keys( + device_keys_query, from_user_id + ) + # Now fetch any devices that we don't have in our cache @defer.inlineCallbacks def do_remote_query(destination): @@ -131,6 +147,14 @@ class E2eKeysHandler(object): if user_id in destination_query: results[user_id] = keys + for user_id, key in remote_result["master_keys"].items(): + if user_id in destination_query: + cross_signing_keys["master"][user_id] = key + + for user_id, key in remote_result["self_signing_keys"].items(): + if user_id in destination_query: + cross_signing_keys["self_signing"][user_id] = key + except Exception as e: failures[destination] = _exception_to_failure(e) @@ -144,7 +168,73 @@ class E2eKeysHandler(object): ) ) - defer.returnValue({"device_keys": results, "failures": failures}) + ret = {"device_keys": results, "failures": failures} + + for key, value in iteritems(cross_signing_keys): + ret[key + "_keys"] = value + + defer.returnValue(ret) + + @defer.inlineCallbacks + def query_cross_signing_keys(self, query, from_user_id): + """Get cross-signing keys for users + + Args: + query (Iterable[string]) an iterable of user IDs. A dict whose keys + are user IDs satisfies this, so the query format used for + query_devices can be used here. + from_user_id (str): the user making the query. This is used when + adding cross-signing signatures to limit what signatures users + can see. + + Returns: + defer.Deferred[dict[str, dict[str, dict]]]: map from + (master|self_signing|user_signing) -> user_id -> key + """ + master_keys = {} + self_signing_keys = {} + user_signing_keys = {} + + for user_id in query: + # XXX: consider changing the store functions to allow querying + # multiple users simultaneously. + try: + key = yield self.store.get_e2e_cross_signing_key( + user_id, "master", from_user_id + ) + if key: + master_keys[user_id] = key + except Exception as e: + logger.info("Error getting master key: %s", e) + + try: + key = yield self.store.get_e2e_cross_signing_key( + user_id, "self_signing", from_user_id + ) + if key: + self_signing_keys[user_id] = key + except Exception as e: + logger.info("Error getting self-signing key: %s", e) + + # users can see other users' master and self-signing keys, but can + # only see their own user-signing keys + if from_user_id == user_id: + try: + key = yield self.store.get_e2e_cross_signing_key( + user_id, "user_signing", from_user_id + ) + if key: + user_signing_keys[user_id] = key + except Exception as e: + logger.info("Error getting user-signing key: %s", e) + + defer.returnValue( + { + "master": master_keys, + "self_signing": self_signing_keys, + "user_signing": user_signing_keys, + } + ) @defer.inlineCallbacks def query_local_devices(self, query): @@ -342,6 +432,104 @@ class E2eKeysHandler(object): yield self.store.add_e2e_one_time_keys(user_id, device_id, time_now, new_keys) + @defer.inlineCallbacks + def upload_signing_keys_for_user(self, user_id, keys): + """Upload signing keys for cross-signing + + Args: + user_id (string): the user uploading the keys + keys (dict[string, dict]): the signing keys + """ + + # if a master key is uploaded, then check it. Otherwise, load the + # stored master key, to check signatures on other keys + if "master_key" in keys: + master_key = keys["master_key"] + + _check_cross_signing_key(master_key, user_id, "master") + else: + master_key = yield self.store.get_e2e_cross_signing_key(user_id, "master") + + # if there is no master key, then we can't do anything, because all the + # other cross-signing keys need to be signed by the master key + if not master_key: + raise SynapseError(400, "No master key available", Codes.MISSING_PARAM) + + master_key_id, master_verify_key = get_verify_key_from_cross_signing_key( + master_key + ) + + # for the other cross-signing keys, make sure that they have valid + # signatures from the master key + if "self_signing_key" in keys: + self_signing_key = keys["self_signing_key"] + + _check_cross_signing_key( + self_signing_key, user_id, "self_signing", master_verify_key + ) + + if "user_signing_key" in keys: + user_signing_key = keys["user_signing_key"] + + _check_cross_signing_key( + user_signing_key, user_id, "user_signing", master_verify_key + ) + + # if everything checks out, then store the keys and send notifications + deviceids = [] + if "master_key" in keys: + yield self.store.set_e2e_cross_signing_key(user_id, "master", master_key) + deviceids.append(master_verify_key.version) + if "self_signing_key" in keys: + yield self.store.set_e2e_cross_signing_key( + user_id, "self_signing", self_signing_key + ) + deviceids.append( + get_verify_key_from_cross_signing_key(self_signing_key)[1].version + ) + if "user_signing_key" in keys: + yield self.store.set_e2e_cross_signing_key( + user_id, "user_signing", user_signing_key + ) + # the signature stream matches the semantics that we want for + # user-signing key updates: only the user themselves is notified of + # their own user-signing key updates + yield self.device_handler.notify_user_signature_update(user_id, [user_id]) + + # master key and self-signing key updates match the semantics of device + # list updates: all users who share an encrypted room are notified + if len(deviceids): + yield self.device_handler.notify_device_update(user_id, deviceids) + + defer.returnValue({}) + + +def _check_cross_signing_key(key, user_id, key_type, signing_key=None): + """Check a cross-signing key uploaded by a user. Performs some basic sanity + checking, and ensures that it is signed, if a signature is required. + + Args: + key (dict): the key data to verify + user_id (str): the user whose key is being checked + key_type (str): the type of key that the key should be + signing_key (VerifyKey): (optional) the signing key that the key should + be signed with. If omitted, signatures will not be checked. + """ + if ( + key.get("user_id") != user_id + or key_type not in key.get("usage", []) + or len(key.get("keys", {})) != 1 + ): + raise SynapseError(400, ("Invalid %s key" % (key_type,)), Codes.INVALID_PARAM) + + if signing_key: + try: + verify_signed_json(key, user_id, signing_key) + except SignatureVerifyException: + raise SynapseError( + 400, ("Invalid signature on %s key" % key_type), Codes.INVALID_SIGNATURE + ) + def _exception_to_failure(e): if isinstance(e, CodeMessageException): diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index cd1ac0a27a..c1c28a5fa1 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2018, 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -1116,6 +1116,11 @@ class SyncHandler(object): # weren't in the previous sync *or* they left and rejoined. users_that_have_changed.update(newly_joined_or_invited_users) + user_signatures_changed = yield self.store.get_users_whose_signatures_changed( + user_id, since_token.device_list_key + ) + users_that_have_changed.update(user_signatures_changed) + # Now find users that we no longer track for room_id in newly_left_rooms: left_users = yield self.state.get_current_users_in_room(room_id) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 45c9928b65..3eaf1fd8a4 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,7 +27,7 @@ from synapse.http.servlet import ( ) from synapse.types import StreamToken -from ._base import client_patterns +from ._base import client_patterns, interactive_auth_handler logger = logging.getLogger(__name__) @@ -145,10 +146,11 @@ class KeyQueryServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request): - yield self.auth.get_user_by_req(request, allow_guest=True) + requester = yield self.auth.get_user_by_req(request, allow_guest=True) + user_id = requester.user.to_string() timeout = parse_integer(request, "timeout", 10 * 1000) body = parse_json_object_from_request(request) - result = yield self.e2e_keys_handler.query_devices(body, timeout) + result = yield self.e2e_keys_handler.query_devices(body, timeout, user_id) defer.returnValue((200, result)) @@ -227,8 +229,46 @@ class OneTimeKeyServlet(RestServlet): defer.returnValue((200, result)) +class SigningKeyUploadServlet(RestServlet): + """ + POST /keys/device_signing/upload HTTP/1.1 + Content-Type: application/json + + { + } + """ + + PATTERNS = client_patterns("/keys/device_signing/upload$", releases=()) + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(SigningKeyUploadServlet, self).__init__() + self.hs = hs + self.auth = hs.get_auth() + self.e2e_keys_handler = hs.get_e2e_keys_handler() + self.auth_handler = hs.get_auth_handler() + + @interactive_auth_handler + @defer.inlineCallbacks + def on_POST(self, request): + requester = yield self.auth.get_user_by_req(request) + user_id = requester.user.to_string() + body = parse_json_object_from_request(request) + + yield self.auth_handler.validate_user_via_ui_auth( + requester, body, self.hs.get_ip_from_request(request) + ) + + result = yield self.e2e_keys_handler.upload_signing_keys_for_user(user_id, body) + defer.returnValue((200, result)) + + def register_servlets(hs, http_server): KeyUploadServlet(hs).register(http_server) KeyQueryServlet(hs).register(http_server) KeyChangesServlet(hs).register(http_server) OneTimeKeyServlet(hs).register(http_server) + SigningKeyUploadServlet(hs).register(http_server) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 6b0ca80087..c20ba1001c 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2018,2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -207,6 +207,9 @@ class DataStore( self._device_list_stream_cache = StreamChangeCache( "DeviceListStreamChangeCache", device_list_max ) + self._user_signature_stream_cache = StreamChangeCache( + "UserSignatureStreamChangeCache", device_list_max + ) self._device_list_federation_stream_cache = StreamChangeCache( "DeviceListFederationStreamChangeCache", device_list_max ) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index b73401bc26..ed372e2fc4 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -302,6 +302,41 @@ class DeviceWorkerStore(SQLBaseStore): """ txn.execute(sql, (destination, stream_id)) + @defer.inlineCallbacks + def add_user_signature_change_to_streams(self, from_user_id, user_ids): + """Persist that a user has made new signatures + + Args: + from_user_id (str): the user who made the signatures + user_ids (list[str]): the users who were signed + """ + + with self._device_list_id_gen.get_next() as stream_id: + yield self.runInteraction( + "add_user_sig_change_to_streams", + self._add_user_signature_change_txn, + from_user_id, + user_ids, + stream_id, + ) + defer.returnValue(stream_id) + + def _add_user_signature_change_txn(self, txn, from_user_id, user_ids, stream_id): + txn.call_after( + self._user_signature_stream_cache.entity_has_changed, + from_user_id, + stream_id, + ) + self._simple_insert_txn( + txn, + "user_signature_stream", + values={ + "stream_id": stream_id, + "from_user_id": from_user_id, + "user_ids": json.dumps(user_ids), + }, + ) + def get_device_stream_token(self): return self._device_list_id_gen.get_current_token() @@ -440,6 +475,28 @@ class DeviceWorkerStore(SQLBaseStore): "get_users_whose_devices_changed", _get_users_whose_devices_changed_txn ) + @defer.inlineCallbacks + def get_users_whose_signatures_changed(self, user_id, from_key): + """Get the users who have new cross-signing signatures made by `user_id` since + `from_key`. + + Args: + user_id (str): the user who made the signatures + from_key (str): The device lists stream token + """ + from_key = int(from_key) + if self._user_signature_stream_cache.has_entity_changed(user_id, from_key): + sql = """ + SELECT DISTINCT user_ids FROM user_signature_stream + WHERE from_user_id = ? AND stream_id > ? + """ + rows = yield self._execute( + "get_users_whose_signatures_changed", None, sql, user_id, from_key + ) + defer.returnValue(set(user for row in rows for user in json.loads(row[0]))) + else: + defer.returnValue(set()) + def get_all_device_list_changes_for_remotes(self, from_key, to_key): """Return a list of `(stream_id, user_id, destination)` which is the combined list of changes to devices, and which destinations need to be diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 2fabb9e2cb..bb5f7d94eb 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,9 +14,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import time + from six import iteritems -from canonicaljson import encode_canonical_json +from canonicaljson import encode_canonical_json, json from twisted.internet import defer @@ -85,11 +89,12 @@ class EndToEndKeyWorkerStore(SQLBaseStore): " k.key_json" " FROM devices d" " %s JOIN e2e_device_keys_json k USING (user_id, device_id)" - " WHERE %s" + " WHERE (%s) AND NOT COALESCE(d.hidden, ?)" ) % ( "LEFT" if include_all_devices else "INNER", " OR ".join("(" + q + ")" for q in query_clauses), ) + query_params.append(False) txn.execute(sql, query_params) rows = self.cursor_to_dict(txn) @@ -281,3 +286,168 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): return self.runInteraction( "delete_e2e_keys_by_device", delete_e2e_keys_by_device_txn ) + + def _set_e2e_cross_signing_key_txn(self, txn, user_id, key_type, key): + """Set a user's cross-signing key. + + Args: + txn (twisted.enterprise.adbapi.Connection): db connection + user_id (str): the user to set the signing key for + key_type (str): the type of key that is being set: either 'master' + for a master key, 'self_signing' for a self-signing key, or + 'user_signing' for a user-signing key + key (dict): the key data + """ + # the cross-signing keys need to occupy the same namespace as devices, + # since signatures are identified by device ID. So add an entry to the + # device table to make sure that we don't have a collision with device + # IDs + + # the 'key' dict will look something like: + # { + # "user_id": "@alice:example.com", + # "usage": ["self_signing"], + # "keys": { + # "ed25519:base64+self+signing+public+key": "base64+self+signing+public+key", + # }, + # "signatures": { + # "@alice:example.com": { + # "ed25519:base64+master+public+key": "base64+signature" + # } + # } + # } + # The "keys" property must only have one entry, which will be the public + # key, so we just grab the first value in there + pubkey = next(iter(key["keys"].values())) + self._simple_insert( + "devices", + values={ + "user_id": user_id, + "device_id": pubkey, + "display_name": key_type + " signing key", + "hidden": True, + }, + desc="store_master_key_device", + ) + + # and finally, store the key itself + self._simple_insert( + "e2e_cross_signing_keys", + values={ + "user_id": user_id, + "keytype": key_type, + "keydata": json.dumps(key), + "added_ts": time.time() * 1000, + }, + desc="store_master_key", + ) + + def set_e2e_cross_signing_key(self, user_id, key_type, key): + """Set a user's cross-signing key. + + Args: + user_id (str): the user to set the user-signing key for + key_type (str): the type of cross-signing key to set + key (dict): the key data + """ + return self.runInteraction( + "add_e2e_cross_signing_key", + self._set_e2e_cross_signing_key_txn, + user_id, + key_type, + key, + ) + + def _get_e2e_cross_signing_key_txn(self, txn, user_id, key_type, from_user_id=None): + """Returns a user's cross-signing key. + + Args: + txn (twisted.enterprise.adbapi.Connection): db connection + user_id (str): the user whose key is being requested + key_type (str): the type of key that is being set: either 'master' + for a master key, 'self_signing' for a self-signing key, or + 'user_signing' for a user-signing key + from_user_id (str): if specified, signatures made by this user on + the key will be included in the result + + Returns: + dict of the key data + """ + sql = ( + "SELECT keydata " + " FROM e2e_cross_signing_keys " + " WHERE user_id = ? AND keytype = ? ORDER BY added_ts DESC LIMIT 1" + ) + txn.execute(sql, (user_id, key_type)) + row = txn.fetchone() + if not row: + return None + key = json.loads(row[0]) + + device_id = None + for k in key["keys"].values(): + device_id = k + + if from_user_id is not None: + sql = ( + "SELECT key_id, signature " + " FROM e2e_cross_signing_signatures " + " WHERE user_id = ? " + " AND target_user_id = ? " + " AND target_device_id = ? " + ) + txn.execute(sql, (from_user_id, user_id, device_id)) + row = txn.fetchone() + if row: + key.setdefault("signatures", {}).setdefault(from_user_id, {})[ + row[0] + ] = row[1] + + return key + + def get_e2e_cross_signing_key(self, user_id, key_type, from_user_id=None): + """Returns a user's cross-signing key. + + Args: + user_id (str): the user whose self-signing key is being requested + key_type (str): the type of cross-signing key to get + from_user_id (str): if specified, signatures made by this user on + the self-signing key will be included in the result + + Returns: + dict of the key data + """ + return self.runInteraction( + "get_e2e_cross_signing_key", + self._get_e2e_cross_signing_key_txn, + user_id, + key_type, + from_user_id, + ) + + def store_e2e_cross_signing_signatures(self, user_id, signatures): + """Stores cross-signing signatures. + + Args: + user_id (str): the user who made the signatures + signatures (iterable[(str, str, str, str)]): signatures to add - each + a tuple of (key_id, target_user_id, target_device_id, signature), + where key_id is the ID of the key (including the signature + algorithm) that made the signature, target_user_id and + target_device_id indicate the device being signed, and signature + is the signature of the device + """ + return self._simple_insert_many( + "e2e_cross_signing_signatures", + [ + { + "user_id": user_id, + "key_id": key_id, + "target_user_id": target_user_id, + "target_device_id": target_device_id, + "signature": signature, + } + for (key_id, target_user_id, target_device_id, signature) in signatures + ], + "add_e2e_signing_key", + ) diff --git a/synapse/storage/schema/delta/56/signing_keys.sql b/synapse/storage/schema/delta/56/signing_keys.sql index 51c96d3116..771740e970 100644 --- a/synapse/storage/schema/delta/56/signing_keys.sql +++ b/synapse/storage/schema/delta/56/signing_keys.sql @@ -13,6 +13,47 @@ * limitations under the License. */ +-- cross-signing keys +CREATE TABLE IF NOT EXISTS e2e_cross_signing_keys ( + user_id TEXT NOT NULL, + -- the type of cross-signing key (master, user_signing, or self_signing) + keytype TEXT NOT NULL, + -- the full key information, as a json-encoded dict + keydata TEXT NOT NULL, + -- time that the key was added + added_ts BIGINT NOT NULL +); + +CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys(user_id, keytype, added_ts); + +-- cross-signing signatures +CREATE TABLE IF NOT EXISTS e2e_cross_signing_signatures ( + -- user who did the signing + user_id TEXT NOT NULL, + -- key used to sign + key_id TEXT NOT NULL, + -- user who was signed + target_user_id TEXT NOT NULL, + -- device/key that was signed + target_device_id TEXT NOT NULL, + -- the actual signature + signature TEXT NOT NULL +); + +CREATE UNIQUE INDEX e2e_cross_signing_signatures_idx ON e2e_cross_signing_signatures(user_id, target_user_id, target_device_id); + +-- stream of user signature updates +CREATE TABLE IF NOT EXISTS user_signature_stream ( + -- uses the same stream ID as device list stream + stream_id BIGINT NOT NULL, + -- user who did the signing + from_user_id TEXT NOT NULL, + -- list of users who were signed, as a JSON array + user_ids TEXT NOT NULL +); + +CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream(stream_id); + -- device list needs to know which ones are "real" devices, and which ones are -- just used to avoid collisions ALTER TABLE devices ADD COLUMN hidden BOOLEAN NULLABLE; diff --git a/synapse/types.py b/synapse/types.py index 51eadb6ad4..7a80471a0c 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,6 +18,8 @@ import string from collections import namedtuple import attr +from signedjson.key import decode_verify_key_bytes +from unpaddedbase64 import decode_base64 from synapse.api.errors import SynapseError @@ -475,3 +478,24 @@ class ReadReceipt(object): user_id = attr.ib() event_ids = attr.ib() data = attr.ib() + + +def get_verify_key_from_cross_signing_key(key_info): + """Get the key ID and signedjson verify key from a cross-signing key dict + + Args: + key_info (dict): a cross-signing key dict, which must have a "keys" + property that has exactly one item in it + + Returns: + (str, VerifyKey): the key ID and verify key for the cross-signing key + """ + # make sure that exactly one key is provided + if "keys" not in key_info: + raise SynapseError(400, "Invalid key") + keys = key_info["keys"] + if len(keys) != 1: + raise SynapseError(400, "Invalid key") + # and return that one key + for key_id, key_data in keys.items(): + return (key_id, decode_verify_key_bytes(key_id, decode_base64(key_data))) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 8dccc6826e..9ae4cb6ea2 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -145,3 +147,64 @@ class E2eKeysHandlerTestCase(unittest.TestCase): "one_time_keys": {local_user: {device_id: {"alg1:k1": "key1"}}}, }, ) + + @defer.inlineCallbacks + def test_replace_master_key(self): + """uploading a new signing key should make the old signing key unavailable""" + local_user = "@boris:" + self.hs.hostname + keys1 = { + "master_key": { + # private key: 2lonYOM6xYKdEsO+6KrC766xBcHnYnim1x/4LFGF8B0 + "user_id": local_user, + "usage": ["master"], + "keys": { + "ed25519:nqOvzeuGWT/sRx3h7+MHoInYj3Uk2LD/unI9kDYcHwk": "nqOvzeuGWT/sRx3h7+MHoInYj3Uk2LD/unI9kDYcHwk" + }, + } + } + yield self.handler.upload_signing_keys_for_user(local_user, keys1) + + keys2 = { + "master_key": { + # private key: 4TL4AjRYwDVwD3pqQzcor+ez/euOB1/q78aTJ+czDNs + "user_id": local_user, + "usage": ["master"], + "keys": { + "ed25519:Hq6gL+utB4ET+UvD5ci0kgAwsX6qP/zvf8v6OInU5iw": "Hq6gL+utB4ET+UvD5ci0kgAwsX6qP/zvf8v6OInU5iw" + }, + } + } + yield self.handler.upload_signing_keys_for_user(local_user, keys2) + + devices = yield self.handler.query_devices({"device_keys": {local_user: []}}, 0, local_user) + self.assertDictEqual(devices["master_keys"], {local_user: keys2["master_key"]}) + + @defer.inlineCallbacks + def test_self_signing_key_doesnt_show_up_as_device(self): + """signing keys should be hidden when fetching a user's devices""" + local_user = "@boris:" + self.hs.hostname + keys1 = { + "master_key": { + # private key: 2lonYOM6xYKdEsO+6KrC766xBcHnYnim1x/4LFGF8B0 + "user_id": local_user, + "usage": ["master"], + "keys": { + "ed25519:nqOvzeuGWT/sRx3h7+MHoInYj3Uk2LD/unI9kDYcHwk": "nqOvzeuGWT/sRx3h7+MHoInYj3Uk2LD/unI9kDYcHwk" + }, + } + } + yield self.handler.upload_signing_keys_for_user(local_user, keys1) + + res = None + try: + yield self.hs.get_device_handler().check_device_registered( + user_id=local_user, + device_id="nqOvzeuGWT/sRx3h7+MHoInYj3Uk2LD/unI9kDYcHwk", + initial_device_display_name="new display name", + ) + except errors.SynapseError as e: + res = e.code + self.assertEqual(res, 400) + + res = yield self.handler.query_local_devices({local_user: None}) + self.assertDictEqual(res, {local_user: {}}) -- cgit 1.4.1 From 781ade836b05b7e327baa7f927c553941edcc368 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Tue, 30 Jul 2019 23:09:50 -0400 Subject: apply changes from PR review --- synapse/storage/devices.py | 33 ++++++++++------------ synapse/storage/end_to_end_keys.py | 2 +- synapse/storage/schema/delta/56/hidden_devices.sql | 18 ++++++++++++ synapse/storage/schema/delta/56/signing_keys.sql | 18 ------------ 4 files changed, 34 insertions(+), 37 deletions(-) create mode 100644 synapse/storage/schema/delta/56/hidden_devices.sql delete mode 100644 synapse/storage/schema/delta/56/signing_keys.sql diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index b73401bc26..f62ed12386 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -37,9 +37,9 @@ DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( class DeviceWorkerStore(SQLBaseStore): - @defer.inlineCallbacks def get_device(self, user_id, device_id): - """Retrieve a device. + """Retrieve a device. Only returns devices that are not marked as + hidden. Args: user_id (str): The ID of the user which owns the device @@ -49,19 +49,17 @@ class DeviceWorkerStore(SQLBaseStore): Raises: StoreError: if the device is not found """ - ret = yield self._simple_select_one( + return self._simple_select_one( table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, + keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False}, retcols=("user_id", "device_id", "display_name", "hidden"), desc="get_device", ) - if ret["hidden"]: - raise StoreError(404, "No row found (devices)") - return ret @defer.inlineCallbacks def get_devices_by_user(self, user_id): - """Retrieve all of a user's registered devices. + """Retrieve all of a user's registered devices. Only returns devices + that are not marked as hidden. Args: user_id (str): @@ -72,12 +70,12 @@ class DeviceWorkerStore(SQLBaseStore): """ devices = yield self._simple_select_list( table="devices", - keyvalues={"user_id": user_id}, - retcols=("user_id", "device_id", "display_name", "hidden"), + keyvalues={"user_id": user_id, "hidden": False}, + retcols=("user_id", "device_id", "display_name"), desc="get_devices_by_user", ) - defer.returnValue({d["device_id"]: d for d in devices if not d["hidden"]}) + defer.returnValue({d["device_id"]: d for d in devices}) @defer.inlineCallbacks def get_devices_by_remote(self, destination, from_stream_id, limit): @@ -605,9 +603,9 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): """ sql = """ DELETE FROM devices - WHERE user_id = ? AND device_id = ? AND NOT COALESCE(hidden, ?) + WHERE user_id = ? AND device_id = ? AND NOT hidden """ - yield self._execute("delete_device", None, sql, user_id, device_id, False) + yield self._execute("delete_device", None, sql, user_id, device_id) self.device_id_exists_cache.invalidate((user_id, device_id)) @@ -626,7 +624,7 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): return sql = """ DELETE FROM devices - WHERE user_id = ? AND device_id IN (%s) AND NOT COALESCE(hidden, ?) + WHERE user_id = ? AND device_id IN (%s) AND NOT hidden """ % ( ",".join("?" for _ in device_ids) ) @@ -640,7 +638,8 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): self.device_id_exists_cache.invalidate((user_id, device_id)) def update_device(self, user_id, device_id, new_display_name=None): - """Update a device. + """Update a device. Only updates the device if it is not marked as + hidden. Args: user_id (str): The ID of the user which owns the device @@ -657,11 +656,9 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): updates["display_name"] = new_display_name if not updates: return defer.succeed(None) - # FIXME: should only update if hidden is not True. But updating the - # display name of a hidden device should be harmless return self._simple_update_one( table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, + keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False}, updatevalues=updates, desc="update_device", ) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 2fabb9e2cb..66eb509588 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -85,7 +85,7 @@ class EndToEndKeyWorkerStore(SQLBaseStore): " k.key_json" " FROM devices d" " %s JOIN e2e_device_keys_json k USING (user_id, device_id)" - " WHERE %s" + " WHERE %s AND NOT d.hidden" ) % ( "LEFT" if include_all_devices else "INNER", " OR ".join("(" + q + ")" for q in query_clauses), diff --git a/synapse/storage/schema/delta/56/hidden_devices.sql b/synapse/storage/schema/delta/56/hidden_devices.sql new file mode 100644 index 0000000000..67f8b20297 --- /dev/null +++ b/synapse/storage/schema/delta/56/hidden_devices.sql @@ -0,0 +1,18 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- device list needs to know which ones are "real" devices, and which ones are +-- just used to avoid collisions +ALTER TABLE devices ADD COLUMN hidden BOOLEAN DEFAULT FALSE; diff --git a/synapse/storage/schema/delta/56/signing_keys.sql b/synapse/storage/schema/delta/56/signing_keys.sql deleted file mode 100644 index 51c96d3116..0000000000 --- a/synapse/storage/schema/delta/56/signing_keys.sql +++ /dev/null @@ -1,18 +0,0 @@ -/* Copyright 2019 New Vector Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- device list needs to know which ones are "real" devices, and which ones are --- just used to avoid collisions -ALTER TABLE devices ADD COLUMN hidden BOOLEAN NULLABLE; -- cgit 1.4.1 From 2997a91250c4af915be70d2be38df1b3889c4c2d Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Tue, 30 Jul 2019 23:14:00 -0400 Subject: add changelog file --- changelog.d/5759.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5759.misc diff --git a/changelog.d/5759.misc b/changelog.d/5759.misc new file mode 100644 index 0000000000..c0bc566c4c --- /dev/null +++ b/changelog.d/5759.misc @@ -0,0 +1 @@ +Allow devices to be marked as hidden, for use by features such as cross-signing. \ No newline at end of file -- cgit 1.4.1 From 185188be03e278a5a9a24f7e206e7fc2410415a7 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 31 Jul 2019 15:18:15 -0400 Subject: remove extra SQL query param --- synapse/storage/devices.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index e7800da4f7..b3e8c7396d 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -630,7 +630,6 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): ) values = [user_id] values.extend(device_ids) - values.append(False) yield self._execute("delete_devices", None, sql, *values) -- cgit 1.4.1 From 430ea08186750ef67899bc302c0b6bb32c2f111c Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 31 Jul 2019 15:38:11 -0400 Subject: PostgreSQL, Y U no like? --- synapse/storage/devices.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index b3e8c7396d..a1f12df907 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -603,9 +603,9 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): """ sql = """ DELETE FROM devices - WHERE user_id = ? AND device_id = ? AND NOT hidden + WHERE user_id = ? AND device_id = ? AND hidden = ? """ - yield self._execute("delete_device", None, sql, user_id, device_id) + yield self._execute("delete_device", None, sql, user_id, device_id, False) self.device_id_exists_cache.invalidate((user_id, device_id)) @@ -624,12 +624,13 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): return sql = """ DELETE FROM devices - WHERE user_id = ? AND device_id IN (%s) AND NOT hidden + WHERE user_id = ? AND device_id IN (%s) AND hidden = ? """ % ( ",".join("?" for _ in device_ids) ) values = [user_id] values.extend(device_ids) + values.append(False) yield self._execute("delete_devices", None, sql, *values) -- cgit 1.4.1 From 73b26f827ccb96a10629ecb0737bd3db4915bb14 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 31 Jul 2019 18:37:05 -0400 Subject: really fix queries to work with Postgres (by going back to not using SQL directly) --- synapse/storage/devices.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index a1f12df907..9f2bb40834 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -601,11 +601,11 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): Returns: defer.Deferred """ - sql = """ - DELETE FROM devices - WHERE user_id = ? AND device_id = ? AND hidden = ? - """ - yield self._execute("delete_device", None, sql, user_id, device_id, False) + yield self._simple_delete_one( + table="devices", + keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False}, + desc="delete_device", + ) self.device_id_exists_cache.invalidate((user_id, device_id)) @@ -619,21 +619,13 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): Returns: defer.Deferred """ - - if not device_ids or len(device_ids) == 0: - return - sql = """ - DELETE FROM devices - WHERE user_id = ? AND device_id IN (%s) AND hidden = ? - """ % ( - ",".join("?" for _ in device_ids) + yield self._simple_delete_many( + table="devices", + column="device_id", + iterable=device_ids, + keyvalues={"user_id": user_id, "hidden": False}, + desc="delete_devices", ) - values = [user_id] - values.extend(device_ids) - values.append(False) - - yield self._execute("delete_devices", None, sql, *values) - for device_id in device_ids: self.device_id_exists_cache.invalidate((user_id, device_id)) -- cgit 1.4.1 From d78a4fe156e574ad1f28cf3b12ed0bb11bc077f4 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 1 Aug 2019 02:16:09 -0400 Subject: don't need to return the hidden column any more --- synapse/storage/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 9f2bb40834..991e28ea24 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -52,7 +52,7 @@ class DeviceWorkerStore(SQLBaseStore): return self._simple_select_one( table="devices", keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False}, - retcols=("user_id", "device_id", "display_name", "hidden"), + retcols=("user_id", "device_id", "display_name"), desc="get_device", ) -- cgit 1.4.1 From fac1cdc5626ab2d59861a6aead8a44e7638934ba Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 1 Aug 2019 21:51:19 -0400 Subject: make changes from PR review --- synapse/handlers/e2e_keys.py | 24 +++++++--- synapse/storage/schema/delta/56/hidden_devices.sql | 41 ---------------- synapse/storage/schema/delta/56/signing_keys.sql | 55 ++++++++++++++++++++++ synapse/types.py | 4 +- 4 files changed, 75 insertions(+), 49 deletions(-) create mode 100644 synapse/storage/schema/delta/56/signing_keys.sql diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 39f4ec8e60..9081c3f64c 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -510,9 +510,18 @@ class E2eKeysHandler(object): if not master_key: raise SynapseError(400, "No master key available", Codes.MISSING_PARAM) - master_key_id, master_verify_key = get_verify_key_from_cross_signing_key( - master_key - ) + try: + master_key_id, master_verify_key = get_verify_key_from_cross_signing_key( + master_key + ) + except ValueError: + if "master_key" in keys: + # the invalid key came from the request + raise SynapseError(400, "Invalid master key", Codes.INVALID_PARAM) + else: + # the invalid key came from the database + logger.error("Invalid master key found for user %s", user_id) + raise SynapseError(500, "Invalid master key") # for the other cross-signing keys, make sure that they have valid # signatures from the master key @@ -539,9 +548,12 @@ class E2eKeysHandler(object): yield self.store.set_e2e_cross_signing_key( user_id, "self_signing", self_signing_key ) - deviceids.append( - get_verify_key_from_cross_signing_key(self_signing_key)[1].version - ) + try: + deviceids.append( + get_verify_key_from_cross_signing_key(self_signing_key)[1].version + ) + except ValueError: + raise SynapseError(400, "Invalid self-signing key", Codes.INVALID_PARAM) if "user_signing_key" in keys: yield self.store.set_e2e_cross_signing_key( user_id, "user_signing", user_signing_key diff --git a/synapse/storage/schema/delta/56/hidden_devices.sql b/synapse/storage/schema/delta/56/hidden_devices.sql index e1cd8cc2c1..67f8b20297 100644 --- a/synapse/storage/schema/delta/56/hidden_devices.sql +++ b/synapse/storage/schema/delta/56/hidden_devices.sql @@ -13,47 +13,6 @@ * limitations under the License. */ --- cross-signing keys -CREATE TABLE IF NOT EXISTS e2e_cross_signing_keys ( - user_id TEXT NOT NULL, - -- the type of cross-signing key (master, user_signing, or self_signing) - keytype TEXT NOT NULL, - -- the full key information, as a json-encoded dict - keydata TEXT NOT NULL, - -- time that the key was added - added_ts BIGINT NOT NULL -); - -CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys(user_id, keytype, added_ts); - --- cross-signing signatures -CREATE TABLE IF NOT EXISTS e2e_cross_signing_signatures ( - -- user who did the signing - user_id TEXT NOT NULL, - -- key used to sign - key_id TEXT NOT NULL, - -- user who was signed - target_user_id TEXT NOT NULL, - -- device/key that was signed - target_device_id TEXT NOT NULL, - -- the actual signature - signature TEXT NOT NULL -); - -CREATE UNIQUE INDEX e2e_cross_signing_signatures_idx ON e2e_cross_signing_signatures(user_id, target_user_id, target_device_id); - --- stream of user signature updates -CREATE TABLE IF NOT EXISTS user_signature_stream ( - -- uses the same stream ID as device list stream - stream_id BIGINT NOT NULL, - -- user who did the signing - from_user_id TEXT NOT NULL, - -- list of users who were signed, as a JSON array - user_ids TEXT NOT NULL -); - -CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream(stream_id); - -- device list needs to know which ones are "real" devices, and which ones are -- just used to avoid collisions ALTER TABLE devices ADD COLUMN hidden BOOLEAN DEFAULT FALSE; diff --git a/synapse/storage/schema/delta/56/signing_keys.sql b/synapse/storage/schema/delta/56/signing_keys.sql new file mode 100644 index 0000000000..6a9ef1782e --- /dev/null +++ b/synapse/storage/schema/delta/56/signing_keys.sql @@ -0,0 +1,55 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- cross-signing keys +CREATE TABLE IF NOT EXISTS e2e_cross_signing_keys ( + user_id TEXT NOT NULL, + -- the type of cross-signing key (master, user_signing, or self_signing) + keytype TEXT NOT NULL, + -- the full key information, as a json-encoded dict + keydata TEXT NOT NULL, + -- time that the key was added + added_ts BIGINT NOT NULL +); + +CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys(user_id, keytype, added_ts); + +-- cross-signing signatures +CREATE TABLE IF NOT EXISTS e2e_cross_signing_signatures ( + -- user who did the signing + user_id TEXT NOT NULL, + -- key used to sign + key_id TEXT NOT NULL, + -- user who was signed + target_user_id TEXT NOT NULL, + -- device/key that was signed + target_device_id TEXT NOT NULL, + -- the actual signature + signature TEXT NOT NULL +); + +CREATE UNIQUE INDEX e2e_cross_signing_signatures_idx ON e2e_cross_signing_signatures(user_id, target_user_id, target_device_id); + +-- stream of user signature updates +CREATE TABLE IF NOT EXISTS user_signature_stream ( + -- uses the same stream ID as device list stream + stream_id BIGINT NOT NULL, + -- user who did the signing + from_user_id TEXT NOT NULL, + -- list of users who were signed, as a JSON array + user_ids TEXT NOT NULL +); + +CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream(stream_id); diff --git a/synapse/types.py b/synapse/types.py index 7a80471a0c..00bb0743ff 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -492,10 +492,10 @@ def get_verify_key_from_cross_signing_key(key_info): """ # make sure that exactly one key is provided if "keys" not in key_info: - raise SynapseError(400, "Invalid key") + raise ValueError("Invalid key") keys = key_info["keys"] if len(keys) != 1: - raise SynapseError(400, "Invalid key") + raise ValueError("Invalid key") # and return that one key for key_id, key_data in keys.items(): return (key_id, decode_verify_key_bytes(key_id, decode_base64(key_data))) -- cgit 1.4.1 From d28d1e2d1b056a0c9e2b9f2c92013515a56dd9fb Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 1 Aug 2019 21:52:35 -0400 Subject: add changelog --- changelog.d/5769.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5769.feature diff --git a/changelog.d/5769.feature b/changelog.d/5769.feature new file mode 100644 index 0000000000..c34257cb8f --- /dev/null +++ b/changelog.d/5769.feature @@ -0,0 +1 @@ +allow uploading of cross-signing keys \ No newline at end of file -- cgit 1.4.1 From 8c9adcc95dee892f90d6acbbe5c54acbf621720b Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 1 Aug 2019 22:09:05 -0400 Subject: fix formatting --- changelog.d/5769.feature | 2 +- tests/handlers/test_e2e_keys.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/changelog.d/5769.feature b/changelog.d/5769.feature index c34257cb8f..bf994ca327 100644 --- a/changelog.d/5769.feature +++ b/changelog.d/5769.feature @@ -1 +1 @@ -allow uploading of cross-signing keys \ No newline at end of file +Allow uploading of cross-signing keys. \ No newline at end of file diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 9ae4cb6ea2..a62c52eefa 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -176,7 +176,9 @@ class E2eKeysHandlerTestCase(unittest.TestCase): } yield self.handler.upload_signing_keys_for_user(local_user, keys2) - devices = yield self.handler.query_devices({"device_keys": {local_user: []}}, 0, local_user) + devices = yield self.handler.query_devices( + {"device_keys": {local_user: []}}, 0, local_user + ) self.assertDictEqual(devices["master_keys"], {local_user: keys2["master_key"]}) @defer.inlineCallbacks -- cgit 1.4.1 From f63ba7a7955d077224d4d602cd33bb31fad92fbc Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Mon, 12 Aug 2019 15:14:37 -0700 Subject: Cross-signing [1/4] -- hidden devices (#5759) * allow devices to be marked as "hidden" This is a prerequisite for cross-signing, as it allows us to create other things that live within the device namespace, so they can be used for signatures. --- changelog.d/5759.misc | 1 + synapse/storage/devices.py | 38 +++++++++++++++++----- synapse/storage/end_to_end_keys.py | 2 +- synapse/storage/schema/delta/56/hidden_devices.sql | 18 ++++++++++ 4 files changed, 49 insertions(+), 10 deletions(-) create mode 100644 changelog.d/5759.misc create mode 100644 synapse/storage/schema/delta/56/hidden_devices.sql diff --git a/changelog.d/5759.misc b/changelog.d/5759.misc new file mode 100644 index 0000000000..c0bc566c4c --- /dev/null +++ b/changelog.d/5759.misc @@ -0,0 +1 @@ +Allow devices to be marked as hidden, for use by features such as cross-signing. \ No newline at end of file diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 8f72d92895..991e28ea24 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,7 +22,7 @@ from canonicaljson import json from twisted.internet import defer -from synapse.api.errors import StoreError +from synapse.api.errors import Codes, StoreError from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import Cache, SQLBaseStore, db_to_json from synapse.storage.background_updates import BackgroundUpdateStore @@ -36,7 +38,8 @@ DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( class DeviceWorkerStore(SQLBaseStore): def get_device(self, user_id, device_id): - """Retrieve a device. + """Retrieve a device. Only returns devices that are not marked as + hidden. Args: user_id (str): The ID of the user which owns the device @@ -48,14 +51,15 @@ class DeviceWorkerStore(SQLBaseStore): """ return self._simple_select_one( table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, + keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False}, retcols=("user_id", "device_id", "display_name"), desc="get_device", ) @defer.inlineCallbacks def get_devices_by_user(self, user_id): - """Retrieve all of a user's registered devices. + """Retrieve all of a user's registered devices. Only returns devices + that are not marked as hidden. Args: user_id (str): @@ -66,7 +70,7 @@ class DeviceWorkerStore(SQLBaseStore): """ devices = yield self._simple_select_list( table="devices", - keyvalues={"user_id": user_id}, + keyvalues={"user_id": user_id, "hidden": False}, retcols=("user_id", "device_id", "display_name"), desc="get_devices_by_user", ) @@ -540,6 +544,8 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): Returns: defer.Deferred: boolean whether the device was inserted or an existing device existed with that ID. + Raises: + StoreError: if the device is already in use """ key = (user_id, device_id) if self.device_id_exists_cache.get(key, None): @@ -552,12 +558,25 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): "user_id": user_id, "device_id": device_id, "display_name": initial_device_display_name, + "hidden": False, }, desc="store_device", or_ignore=True, ) + if not inserted: + # if the device already exists, check if it's a real device, or + # if the device ID is reserved by something else + hidden = yield self._simple_select_one_onecol( + "devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + retcol="hidden", + ) + if hidden: + raise StoreError(400, "The device ID is in use", Codes.FORBIDDEN) self.device_id_exists_cache.prefill(key, True) return inserted + except StoreError: + raise except Exception as e: logger.error( "store_device with device_id=%s(%r) user_id=%s(%r)" @@ -584,7 +603,7 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): """ yield self._simple_delete_one( table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, + keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False}, desc="delete_device", ) @@ -604,14 +623,15 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): table="devices", column="device_id", iterable=device_ids, - keyvalues={"user_id": user_id}, + keyvalues={"user_id": user_id, "hidden": False}, desc="delete_devices", ) for device_id in device_ids: self.device_id_exists_cache.invalidate((user_id, device_id)) def update_device(self, user_id, device_id, new_display_name=None): - """Update a device. + """Update a device. Only updates the device if it is not marked as + hidden. Args: user_id (str): The ID of the user which owns the device @@ -630,7 +650,7 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): return defer.succeed(None) return self._simple_update_one( table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, + keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False}, updatevalues=updates, desc="update_device", ) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 1e07474e70..6f524cedd9 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -85,7 +85,7 @@ class EndToEndKeyWorkerStore(SQLBaseStore): " k.key_json" " FROM devices d" " %s JOIN e2e_device_keys_json k USING (user_id, device_id)" - " WHERE %s" + " WHERE %s AND NOT d.hidden" ) % ( "LEFT" if include_all_devices else "INNER", " OR ".join("(" + q + ")" for q in query_clauses), diff --git a/synapse/storage/schema/delta/56/hidden_devices.sql b/synapse/storage/schema/delta/56/hidden_devices.sql new file mode 100644 index 0000000000..67f8b20297 --- /dev/null +++ b/synapse/storage/schema/delta/56/hidden_devices.sql @@ -0,0 +1,18 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- device list needs to know which ones are "real" devices, and which ones are +-- just used to avoid collisions +ALTER TABLE devices ADD COLUMN hidden BOOLEAN DEFAULT FALSE; -- cgit 1.4.1 From 7c3abc65728af052b0d484f9669b1c084cd2faf5 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 21 Aug 2019 13:19:35 -0700 Subject: apply PR review suggestions --- synapse/handlers/e2e_keys.py | 76 ++++++++++++++++-------------------- synapse/rest/client/v2_alpha/keys.py | 2 +- synapse/storage/devices.py | 6 +-- synapse/storage/end_to_end_keys.py | 15 +++---- 4 files changed, 46 insertions(+), 53 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 9081c3f64c..53ca8330ad 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -17,6 +17,8 @@ import logging +import time + from six import iteritems from canonicaljson import encode_canonical_json, json @@ -132,7 +134,7 @@ class E2eKeysHandler(object): r[user_id] = remote_queries[user_id] # Get cached cross-signing keys - cross_signing_keys = yield self.query_cross_signing_keys( + cross_signing_keys = yield self.get_cross_signing_keys_from_cache( device_keys_query, from_user_id ) @@ -200,11 +202,11 @@ class E2eKeysHandler(object): for user_id, key in remote_result["master_keys"].items(): if user_id in destination_query: - cross_signing_keys["master"][user_id] = key + cross_signing_keys["master_keys"][user_id] = key for user_id, key in remote_result["self_signing_keys"].items(): if user_id in destination_query: - cross_signing_keys["self_signing"][user_id] = key + cross_signing_keys["self_signing_keys"][user_id] = key except Exception as e: failure = _exception_to_failure(e) @@ -222,14 +224,13 @@ class E2eKeysHandler(object): ret = {"device_keys": results, "failures": failures} - for key, value in iteritems(cross_signing_keys): - ret[key + "_keys"] = value + ret.update(cross_signing_keys) return ret @defer.inlineCallbacks - def query_cross_signing_keys(self, query, from_user_id): - """Get cross-signing keys for users + def get_cross_signing_keys_from_cache(self, query, from_user_id): + """Get cross-signing keys for users from the database Args: query (Iterable[string]) an iterable of user IDs. A dict whose keys @@ -250,43 +251,32 @@ class E2eKeysHandler(object): for user_id in query: # XXX: consider changing the store functions to allow querying # multiple users simultaneously. - try: - key = yield self.store.get_e2e_cross_signing_key( - user_id, "master", from_user_id - ) - if key: - master_keys[user_id] = key - except Exception as e: - logger.info("Error getting master key: %s", e) + key = yield self.store.get_e2e_cross_signing_key( + user_id, "master", from_user_id + ) + if key: + master_keys[user_id] = key - try: - key = yield self.store.get_e2e_cross_signing_key( - user_id, "self_signing", from_user_id - ) - if key: - self_signing_keys[user_id] = key - except Exception as e: - logger.info("Error getting self-signing key: %s", e) + key = yield self.store.get_e2e_cross_signing_key( + user_id, "self_signing", from_user_id + ) + if key: + self_signing_keys[user_id] = key # users can see other users' master and self-signing keys, but can # only see their own user-signing keys if from_user_id == user_id: - try: - key = yield self.store.get_e2e_cross_signing_key( - user_id, "user_signing", from_user_id - ) - if key: - user_signing_keys[user_id] = key - except Exception as e: - logger.info("Error getting user-signing key: %s", e) + key = yield self.store.get_e2e_cross_signing_key( + user_id, "user_signing", from_user_id + ) + if key: + user_signing_keys[user_id] = key - defer.returnValue( - { - "master": master_keys, - "self_signing": self_signing_keys, - "user_signing": user_signing_keys, - } - ) + return { + "master_keys": master_keys, + "self_signing_keys": self_signing_keys, + "user_signing_keys": user_signing_keys, + } @defer.inlineCallbacks def query_local_devices(self, query): @@ -542,11 +532,13 @@ class E2eKeysHandler(object): # if everything checks out, then store the keys and send notifications deviceids = [] if "master_key" in keys: - yield self.store.set_e2e_cross_signing_key(user_id, "master", master_key) + yield self.store.set_e2e_cross_signing_key( + user_id, "master", master_key, time.time() * 1000 + ) deviceids.append(master_verify_key.version) if "self_signing_key" in keys: yield self.store.set_e2e_cross_signing_key( - user_id, "self_signing", self_signing_key + user_id, "self_signing", self_signing_key, time.time() * 1000 ) try: deviceids.append( @@ -556,7 +548,7 @@ class E2eKeysHandler(object): raise SynapseError(400, "Invalid self-signing key", Codes.INVALID_PARAM) if "user_signing_key" in keys: yield self.store.set_e2e_cross_signing_key( - user_id, "user_signing", user_signing_key + user_id, "user_signing", user_signing_key, time.time() * 1000 ) # the signature stream matches the semantics that we want for # user-signing key updates: only the user themselves is notified of @@ -568,7 +560,7 @@ class E2eKeysHandler(object): if len(deviceids): yield self.device_handler.notify_device_update(user_id, deviceids) - defer.returnValue({}) + return {} def _check_cross_signing_key(key, user_id, key_type, signing_key=None): diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index f40a785598..1340d2c80d 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -263,7 +263,7 @@ class SigningKeyUploadServlet(RestServlet): ) result = yield self.e2e_keys_handler.upload_signing_keys_for_user(user_id, body) - defer.returnValue((200, result)) + return (200, result) def register_servlets(hs, http_server): diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index da23a350a1..6a5572e001 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -317,7 +317,7 @@ class DeviceWorkerStore(SQLBaseStore): user_ids, stream_id, ) - defer.returnValue(stream_id) + return stream_id def _add_user_signature_change_txn(self, txn, from_user_id, user_ids, stream_id): txn.call_after( @@ -491,9 +491,9 @@ class DeviceWorkerStore(SQLBaseStore): rows = yield self._execute( "get_users_whose_signatures_changed", None, sql, user_id, from_key ) - defer.returnValue(set(user for row in rows for user in json.loads(row[0]))) + return set(user for row in rows for user in json.loads(row[0])) else: - defer.returnValue(set()) + return set() def get_all_device_list_changes_for_remotes(self, from_key, to_key): """Return a list of `(stream_id, user_id, destination)` which is the diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index de2d1bbb9f..b218b7b2e8 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -14,8 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import time - from six import iteritems from canonicaljson import encode_canonical_json, json @@ -284,7 +282,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): "delete_e2e_keys_by_device", delete_e2e_keys_by_device_txn ) - def _set_e2e_cross_signing_key_txn(self, txn, user_id, key_type, key): + def _set_e2e_cross_signing_key_txn(self, txn, user_id, key_type, key, added_ts): """Set a user's cross-signing key. Args: @@ -294,6 +292,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): for a master key, 'self_signing' for a self-signing key, or 'user_signing' for a user-signing key key (dict): the key data + added_ts (int): the timestamp for when the key was added """ # the cross-signing keys need to occupy the same namespace as devices, # since signatures are identified by device ID. So add an entry to the @@ -334,18 +333,19 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): "user_id": user_id, "keytype": key_type, "keydata": json.dumps(key), - "added_ts": time.time() * 1000, + "added_ts": added_ts, }, desc="store_master_key", ) - def set_e2e_cross_signing_key(self, user_id, key_type, key): + def set_e2e_cross_signing_key(self, user_id, key_type, key, added_ts): """Set a user's cross-signing key. Args: user_id (str): the user to set the user-signing key for key_type (str): the type of cross-signing key to set key (dict): the key data + added_ts (int): the timestamp for when the key was added """ return self.runInteraction( "add_e2e_cross_signing_key", @@ -353,6 +353,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): user_id, key_type, key, + added_ts, ) def _get_e2e_cross_signing_key_txn(self, txn, user_id, key_type, from_user_id=None): @@ -368,7 +369,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): the key will be included in the result Returns: - dict of the key data + dict of the key data or None if not found """ sql = ( "SELECT keydata " @@ -412,7 +413,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): the self-signing key will be included in the result Returns: - dict of the key data + dict of the key data or None if not found """ return self.runInteraction( "get_e2e_cross_signing_key", -- cgit 1.4.1 From 814f253f1b102475fe0baace8b65e2281e7b6a89 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 21 Aug 2019 13:22:15 -0700 Subject: make isort happy --- synapse/handlers/e2e_keys.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 53ca8330ad..be15597ee8 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -16,7 +16,6 @@ # limitations under the License. import logging - import time from six import iteritems -- cgit 1.4.1 From 3b0b22cb059f7dfd1d7a7878fe391be38ee91d71 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 28 Aug 2019 17:17:21 -0700 Subject: use stream ID generator instead of timestamp --- synapse/handlers/e2e_keys.py | 7 +++--- synapse/storage/__init__.py | 3 +++ synapse/storage/end_to_end_keys.py | 30 +++++++++++------------- synapse/storage/schema/delta/56/signing_keys.sql | 6 ++--- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index be15597ee8..d2d9bef1fe 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -16,7 +16,6 @@ # limitations under the License. import logging -import time from six import iteritems @@ -532,12 +531,12 @@ class E2eKeysHandler(object): deviceids = [] if "master_key" in keys: yield self.store.set_e2e_cross_signing_key( - user_id, "master", master_key, time.time() * 1000 + user_id, "master", master_key ) deviceids.append(master_verify_key.version) if "self_signing_key" in keys: yield self.store.set_e2e_cross_signing_key( - user_id, "self_signing", self_signing_key, time.time() * 1000 + user_id, "self_signing", self_signing_key ) try: deviceids.append( @@ -547,7 +546,7 @@ class E2eKeysHandler(object): raise SynapseError(400, "Invalid self-signing key", Codes.INVALID_PARAM) if "user_signing_key" in keys: yield self.store.set_e2e_cross_signing_key( - user_id, "user_signing", user_signing_key, time.time() * 1000 + user_id, "user_signing", user_signing_key ) # the signature stream matches the semantics that we want for # user-signing key updates: only the user themselves is notified of diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0a64f90624..e9a9c2cd8d 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -136,6 +136,9 @@ class DataStore( self._device_list_id_gen = StreamIdGenerator( db_conn, "device_lists_stream", "stream_id" ) + self._cross_signing_id_gen = StreamIdGenerator( + db_conn, "e2e_cross_signing_keys", "stream_id" + ) self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id") diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index b218b7b2e8..4b37bffb0b 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -282,7 +282,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): "delete_e2e_keys_by_device", delete_e2e_keys_by_device_txn ) - def _set_e2e_cross_signing_key_txn(self, txn, user_id, key_type, key, added_ts): + def _set_e2e_cross_signing_key_txn(self, txn, user_id, key_type, key): """Set a user's cross-signing key. Args: @@ -292,7 +292,6 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): for a master key, 'self_signing' for a self-signing key, or 'user_signing' for a user-signing key key (dict): the key data - added_ts (int): the timestamp for when the key was added """ # the cross-signing keys need to occupy the same namespace as devices, # since signatures are identified by device ID. So add an entry to the @@ -327,25 +326,25 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): ) # and finally, store the key itself - self._simple_insert( - "e2e_cross_signing_keys", - values={ - "user_id": user_id, - "keytype": key_type, - "keydata": json.dumps(key), - "added_ts": added_ts, - }, - desc="store_master_key", - ) + with self._cross_signing_id_gen.get_next() as stream_id: + self._simple_insert( + "e2e_cross_signing_keys", + values={ + "user_id": user_id, + "keytype": key_type, + "keydata": json.dumps(key), + "stream_id": stream_id, + }, + desc="store_master_key", + ) - def set_e2e_cross_signing_key(self, user_id, key_type, key, added_ts): + def set_e2e_cross_signing_key(self, user_id, key_type, key): """Set a user's cross-signing key. Args: user_id (str): the user to set the user-signing key for key_type (str): the type of cross-signing key to set key (dict): the key data - added_ts (int): the timestamp for when the key was added """ return self.runInteraction( "add_e2e_cross_signing_key", @@ -353,7 +352,6 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): user_id, key_type, key, - added_ts, ) def _get_e2e_cross_signing_key_txn(self, txn, user_id, key_type, from_user_id=None): @@ -374,7 +372,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): sql = ( "SELECT keydata " " FROM e2e_cross_signing_keys " - " WHERE user_id = ? AND keytype = ? ORDER BY added_ts DESC LIMIT 1" + " WHERE user_id = ? AND keytype = ? ORDER BY stream_id DESC LIMIT 1" ) txn.execute(sql, (user_id, key_type)) row = txn.fetchone() diff --git a/synapse/storage/schema/delta/56/signing_keys.sql b/synapse/storage/schema/delta/56/signing_keys.sql index 6a9ef1782e..27a96123e3 100644 --- a/synapse/storage/schema/delta/56/signing_keys.sql +++ b/synapse/storage/schema/delta/56/signing_keys.sql @@ -20,11 +20,11 @@ CREATE TABLE IF NOT EXISTS e2e_cross_signing_keys ( keytype TEXT NOT NULL, -- the full key information, as a json-encoded dict keydata TEXT NOT NULL, - -- time that the key was added - added_ts BIGINT NOT NULL + -- for keeping the keys in order, so that we can fetch the latest one + stream_id BIGINT NOT NULL ); -CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys(user_id, keytype, added_ts); +CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys(user_id, keytype, stream_id); -- cross-signing signatures CREATE TABLE IF NOT EXISTS e2e_cross_signing_signatures ( -- cgit 1.4.1 From 96bda563701795537c39d56d82869d953a6bf167 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 28 Aug 2019 17:18:40 -0700 Subject: black --- synapse/handlers/e2e_keys.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index d2d9bef1fe..870810e6ea 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -530,9 +530,7 @@ class E2eKeysHandler(object): # if everything checks out, then store the keys and send notifications deviceids = [] if "master_key" in keys: - yield self.store.set_e2e_cross_signing_key( - user_id, "master", master_key - ) + yield self.store.set_e2e_cross_signing_key(user_id, "master", master_key) deviceids.append(master_verify_key.version) if "self_signing_key" in keys: yield self.store.set_e2e_cross_signing_key( -- cgit 1.4.1 From a4bf72c30c5953b721a64eae89db186fa8735bb3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 29 Aug 2019 17:38:51 +0100 Subject: Censor redactions in DB after a month --- synapse/storage/events.py | 88 +++++++++++++++++++++- .../storage/schema/delta/56/redaction_censor.sql | 17 +++++ tests/storage/test_redaction.py | 71 +++++++++++++++++ 3 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/56/redaction_censor.sql diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 5a95c36a8b..2970da6829 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -23,7 +23,7 @@ from functools import wraps from six import iteritems, text_type from six.moves import range -from canonicaljson import json +from canonicaljson import encode_canonical_json, json from prometheus_client import Counter, Histogram from twisted.internet import defer @@ -33,6 +33,7 @@ from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 +from synapse.events.utils import prune_event_dict from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable from synapse.logging.utils import log_function from synapse.metrics import BucketCollector @@ -262,6 +263,13 @@ class EventsStore( hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000) + def _censor_redactions(): + return run_as_background_process( + "_censor_redactions", self._censor_redactions + ) + + hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000) + @defer.inlineCallbacks def _read_forward_extremities(self): def fetch(txn): @@ -1548,6 +1556,84 @@ class EventsStore( (event.event_id, event.redacts), ) + @defer.inlineCallbacks + def _censor_redactions(self): + """Censors all redactions older than a month that haven't been censored. + + By censor we mean update the event_json table with the redacted event. + + Returns: + Deferred + """ + + if self.stream_ordering_month_ago is None: + return + + max_pos = self.stream_ordering_month_ago + + # We fetch all redactions that point to an event that we have that has + # a stream ordering from over a month ago, that we haven't yet censored + # in the DB. + sql = """ + SELECT er.event_id, redacts FROM redactions + INNER JOIN events AS er USING (event_id) + INNER JOIN events AS eb ON (er.room_id = eb.room_id AND redacts = eb.event_id) + WHERE NOT have_censored + AND ? <= er.stream_ordering AND er.stream_ordering <= ? + ORDER BY er.stream_ordering ASC + LIMIT ? + """ + + rows = yield self._execute( + "_censor_redactions_fetch", None, sql, -max_pos, max_pos, 100 + ) + + updates = [] + + for redaction_id, event_id in rows: + redaction_event = yield self.get_event(redaction_id, allow_none=True) + original_event = yield self.get_event( + event_id, allow_rejected=True, allow_none=True + ) + + # The SQL above ensures that we have both the redaction and + # original event, so if the `get_event` calls return None it + # means that the redaction wasn't allowed. Either way we know that + # the result won't change so we mark the fact that we've checked. + if ( + redaction_event + and original_event + and original_event.internal_metadata.is_redacted() + ): + # Redaction was allowed + pruned_json = encode_canonical_json( + prune_event_dict(original_event.get_dict()) + ) + else: + # Redaction wasn't allowed + pruned_json = None + + updates.append((redaction_id, event_id, pruned_json)) + + def _update_censor_txn(txn): + for redaction_id, event_id, pruned_json in updates: + if pruned_json: + self._simple_update_one_txn( + txn, + table="event_json", + keyvalues={"event_id": event_id}, + updatevalues={"json": pruned_json}, + ) + + self._simple_update_one_txn( + txn, + table="redactions", + keyvalues={"event_id": redaction_id}, + updatevalues={"have_censored": True}, + ) + + yield self.runInteraction("_update_censor_txn", _update_censor_txn) + @defer.inlineCallbacks def count_daily_messages(self): """ diff --git a/synapse/storage/schema/delta/56/redaction_censor.sql b/synapse/storage/schema/delta/56/redaction_censor.sql new file mode 100644 index 0000000000..fe51b02309 --- /dev/null +++ b/synapse/storage/schema/delta/56/redaction_censor.sql @@ -0,0 +1,17 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE redactions ADD COLUMN have_censored BOOL NOT NULL DEFAULT false; +CREATE INDEX redactions_have_censored ON redactions(event_id) WHERE not have_censored; diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index d961b81d48..0c9f3c7071 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -17,6 +17,8 @@ from mock import Mock +from canonicaljson import json + from twisted.internet import defer from synapse.api.constants import EventTypes, Membership @@ -286,3 +288,72 @@ class RedactionTestCase(unittest.HomeserverTestCase): self.assertEqual( fetched.unsigned["redacted_because"].event_id, redaction_event_id2 ) + + def test_redact_censor(self): + """Test that a redacted event gets censored in the DB after a month + """ + + self.get_success( + self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) + ) + + msg_event = self.get_success(self.inject_message(self.room1, self.u_alice, "t")) + + # Check event has not been redacted: + event = self.get_success(self.store.get_event(msg_event.event_id)) + + self.assertObjectHasAttributes( + { + "type": EventTypes.Message, + "user_id": self.u_alice.to_string(), + "content": {"body": "t", "msgtype": "message"}, + }, + event, + ) + + self.assertFalse("redacted_because" in event.unsigned) + + # Redact event + reason = "Because I said so" + self.get_success( + self.inject_redaction(self.room1, msg_event.event_id, self.u_alice, reason) + ) + + event = self.get_success(self.store.get_event(msg_event.event_id)) + + self.assertTrue("redacted_because" in event.unsigned) + + self.assertObjectHasAttributes( + { + "type": EventTypes.Message, + "user_id": self.u_alice.to_string(), + "content": {}, + }, + event, + ) + + event_json = self.get_success( + self.store._simple_select_one_onecol( + table="event_json", + keyvalues={"event_id": msg_event.event_id}, + retcol="json", + ) + ) + + self.assert_dict( + {"content": {"body": "t", "msgtype": "message"}}, json.loads(event_json) + ) + + # Advance by 30 days + self.reactor.advance(60 * 60 * 24 * 31) + self.reactor.advance(60 * 60 * 2) + + event_json = self.get_success( + self.store._simple_select_one_onecol( + table="event_json", + keyvalues={"event_id": msg_event.event_id}, + retcol="json", + ) + ) + + self.assert_dict({"content": {}}, json.loads(event_json)) -- cgit 1.4.1 From 549f974897ddf2fb0e5dc571c3da8034a1eb6510 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 30 Aug 2019 09:51:33 +0100 Subject: Newsfile --- changelog.d/5934.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5934.feature diff --git a/changelog.d/5934.feature b/changelog.d/5934.feature new file mode 100644 index 0000000000..eae969a52a --- /dev/null +++ b/changelog.d/5934.feature @@ -0,0 +1 @@ +Redact events in the database that have been redacted for a month. -- cgit 1.4.1 From cee00a3584970c8f00f86d265df392adb8f216d7 Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Mon, 2 Sep 2019 05:27:39 -0500 Subject: Update INSTALL.md to say that Python 2 is no longer supported (#5953) Signed-off-by: Aaron Raimist --- INSTALL.md | 4 ++-- changelog.d/5953.misc | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/5953.misc diff --git a/INSTALL.md b/INSTALL.md index 5728882460..6bce370ea8 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -36,7 +36,7 @@ that your email address is probably `user@example.com` rather than System requirements: - POSIX-compliant system (tested on Linux & OS X) -- Python 3.5, 3.6, 3.7, or 2.7 +- Python 3.5, 3.6, or 3.7 - At least 1GB of free RAM if you want to join large public rooms like #matrix:matrix.org Synapse is written in Python but some of the libraries it uses are written in @@ -421,7 +421,7 @@ If Synapse is not configured with an SMTP server, password reset via email will The easiest way to create a new user is to do so from a client like [Riot](https://riot.im). -Alternatively you can do so from the command line if you have installed via pip. +Alternatively you can do so from the command line if you have installed via pip. This can be done as follows: diff --git a/changelog.d/5953.misc b/changelog.d/5953.misc new file mode 100644 index 0000000000..38e885f42a --- /dev/null +++ b/changelog.d/5953.misc @@ -0,0 +1 @@ +Update INSTALL.md to say that Python 2 is no longer supported. -- cgit 1.4.1 From ce7803b8b030f7359dd1c394d1c874dbfdd79f36 Mon Sep 17 00:00:00 2001 From: L0ric0 Date: Mon, 2 Sep 2019 13:18:41 +0200 Subject: fix thumbnail storage location (#5915) * fix thumbnail storage location Signed-off-by: Lorenz Steinert * Add changelog file. Signed-off-by: Lorenz Steinert * Update Changelog Signed-off-by: Lorenz Steinert --- changelog.d/5915.bugfix | 1 + synapse/rest/media/v1/media_repository.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5915.bugfix diff --git a/changelog.d/5915.bugfix b/changelog.d/5915.bugfix new file mode 100644 index 0000000000..bf5b99fedc --- /dev/null +++ b/changelog.d/5915.bugfix @@ -0,0 +1 @@ +Fix 404 for thumbnail download when `dynamic_thumbnails` is `false` and the thumbnail was dynamically generated. Fix reported by rkfg. diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d4ea09260c..b972e152a9 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -526,7 +526,7 @@ class MediaRepository(object): try: file_info = FileInfo( server_name=server_name, - file_id=media_id, + file_id=file_id, thumbnail=True, thumbnail_width=t_width, thumbnail_height=t_height, -- cgit 1.4.1 From 36f34e6f3d551ac7f1bcd92771502d8e37722c33 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 2 Sep 2019 18:29:21 +0100 Subject: Remove unused methods from c/s api v1 in register.py (#5963) These methods were part of the v1 C/S API. Remove them as they are no longer used by any code paths. --- changelog.d/5963.misc | 1 + synapse/handlers/register.py | 104 ------------------------------------------- synapse/http/client.py | 34 +------------- 3 files changed, 2 insertions(+), 137 deletions(-) create mode 100644 changelog.d/5963.misc diff --git a/changelog.d/5963.misc b/changelog.d/5963.misc new file mode 100644 index 0000000000..0d6c3c3d65 --- /dev/null +++ b/changelog.d/5963.misc @@ -0,0 +1 @@ +Remove left-over methods from C/S registration API. \ No newline at end of file diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index be0425a33b..3142d85788 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -24,13 +24,11 @@ from synapse.api.errors import ( AuthError, Codes, ConsentNotGivenError, - InvalidCaptchaError, LimitExceededError, RegistrationError, SynapseError, ) from synapse.config.server import is_threepid_reserved -from synapse.http.client import CaptchaServerHttpClient from synapse.http.servlet import assert_params_in_dict from synapse.replication.http.login import RegisterDeviceReplicationServlet from synapse.replication.http.register import ( @@ -39,7 +37,6 @@ from synapse.replication.http.register import ( ) from synapse.types import RoomAlias, RoomID, UserID, create_requester from synapse.util.async_helpers import Linearizer -from synapse.util.threepids import check_3pid_allowed from ._base import BaseHandler @@ -59,7 +56,6 @@ class RegistrationHandler(BaseHandler): self._auth_handler = hs.get_auth_handler() self.profile_handler = hs.get_profile_handler() self.user_directory_handler = hs.get_user_directory_handler() - self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler self.ratelimiter = hs.get_registration_ratelimiter() @@ -362,70 +358,6 @@ class RegistrationHandler(BaseHandler): ) return user_id - @defer.inlineCallbacks - def check_recaptcha(self, ip, private_key, challenge, response): - """ - Checks a recaptcha is correct. - - Used only by c/s api v1 - """ - - captcha_response = yield self._validate_captcha( - ip, private_key, challenge, response - ) - if not captcha_response["valid"]: - logger.info( - "Invalid captcha entered from %s. Error: %s", - ip, - captcha_response["error_url"], - ) - raise InvalidCaptchaError(error_url=captcha_response["error_url"]) - else: - logger.info("Valid captcha entered from %s", ip) - - @defer.inlineCallbacks - def register_email(self, threepidCreds): - """ - Registers emails with an identity server. - - Used only by c/s api v1 - """ - - for c in threepidCreds: - logger.info( - "validating threepidcred sid %s on id server %s", - c["sid"], - c["idServer"], - ) - try: - threepid = yield self.identity_handler.threepid_from_creds(c) - except Exception: - logger.exception("Couldn't validate 3pid") - raise RegistrationError(400, "Couldn't validate 3pid") - - if not threepid: - raise RegistrationError(400, "Couldn't validate 3pid") - logger.info( - "got threepid with medium '%s' and address '%s'", - threepid["medium"], - threepid["address"], - ) - - if not check_3pid_allowed(self.hs, threepid["medium"], threepid["address"]): - raise RegistrationError(403, "Third party identifier is not allowed") - - @defer.inlineCallbacks - def bind_emails(self, user_id, threepidCreds): - """Links emails with a user ID and informs an identity server. - - Used only by c/s api v1 - """ - - # Now we have a matrix ID, bind it to the threepids we were given - for c in threepidCreds: - # XXX: This should be a deferred list, shouldn't it? - yield self.identity_handler.bind_threepid(c, user_id) - def check_user_id_not_appservice_exclusive(self, user_id, allowed_appservice=None): # don't allow people to register the server notices mxid if self._server_notices_mxid is not None: @@ -463,42 +395,6 @@ class RegistrationHandler(BaseHandler): self._next_generated_user_id += 1 return str(id) - @defer.inlineCallbacks - def _validate_captcha(self, ip_addr, private_key, challenge, response): - """Validates the captcha provided. - - Used only by c/s api v1 - - Returns: - dict: Containing 'valid'(bool) and 'error_url'(str) if invalid. - - """ - response = yield self._submit_captcha(ip_addr, private_key, challenge, response) - # parse Google's response. Lovely format.. - lines = response.split("\n") - json = { - "valid": lines[0] == "true", - "error_url": "http://www.recaptcha.net/recaptcha/api/challenge?" - + "error=%s" % lines[1], - } - return json - - @defer.inlineCallbacks - def _submit_captcha(self, ip_addr, private_key, challenge, response): - """ - Used only by c/s api v1 - """ - data = yield self.captcha_client.post_urlencoded_get_raw( - "http://www.recaptcha.net:80/recaptcha/api/verify", - args={ - "privatekey": private_key, - "remoteip": ip_addr, - "challenge": challenge, - "response": response, - }, - ) - return data - @defer.inlineCallbacks def _join_user_to_room(self, requester, room_identifier): room_id = None diff --git a/synapse/http/client.py b/synapse/http/client.py index 0ac20ebefc..0ae6db8ea7 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -35,7 +35,7 @@ from twisted.internet.interfaces import ( ) from twisted.python.failure import Failure from twisted.web._newclient import ResponseDone -from twisted.web.client import Agent, HTTPConnectionPool, PartialDownloadError, readBody +from twisted.web.client import Agent, HTTPConnectionPool, readBody from twisted.web.http import PotentialDataLoss from twisted.web.http_headers import Headers @@ -599,38 +599,6 @@ def _readBodyToFile(response, stream, max_size): return d -class CaptchaServerHttpClient(SimpleHttpClient): - """ - Separate HTTP client for talking to google's captcha servers - Only slightly special because accepts partial download responses - - used only by c/s api v1 - """ - - @defer.inlineCallbacks - def post_urlencoded_get_raw(self, url, args={}): - query_bytes = urllib.parse.urlencode(encode_urlencode_args(args), True) - - response = yield self.request( - "POST", - url, - data=query_bytes, - headers=Headers( - { - b"Content-Type": [b"application/x-www-form-urlencoded"], - b"User-Agent": [self.user_agent], - } - ), - ) - - try: - body = yield make_deferred_yieldable(readBody(response)) - return body - except PartialDownloadError as e: - # twisted dislikes google's response, no content length. - return e.response - - def encode_urlencode_args(args): return {k: encode_urlencode_arg(v) for k, v in args.items()} -- cgit 1.4.1 From a90d16dabc6f498136a098568b1d37858d4af5b6 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Tue, 3 Sep 2019 10:21:30 +0100 Subject: Opentrace device lists (#5853) Trace device list changes. --- changelog.d/5853.feature | 1 + synapse/handlers/device.py | 65 +++++++++++++++++++++++++- synapse/handlers/devicemessage.py | 6 ++- synapse/logging/opentracing.py | 70 +++++++--------------------- synapse/rest/client/v2_alpha/keys.py | 4 +- synapse/rest/client/v2_alpha/sendtodevice.py | 4 ++ synapse/storage/deviceinbox.py | 21 +++++++++ synapse/storage/devices.py | 5 ++ 8 files changed, 118 insertions(+), 58 deletions(-) create mode 100644 changelog.d/5853.feature diff --git a/changelog.d/5853.feature b/changelog.d/5853.feature new file mode 100644 index 0000000000..80a04ae2ee --- /dev/null +++ b/changelog.d/5853.feature @@ -0,0 +1 @@ +Opentracing for device list updates. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 5c1cf83c9d..71a8f33da3 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -25,6 +25,7 @@ from synapse.api.errors import ( HttpResponseException, RequestSendFailed, ) +from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.types import RoomStreamToken, get_domain_from_id from synapse.util import stringutils from synapse.util.async_helpers import Linearizer @@ -45,6 +46,7 @@ class DeviceWorkerHandler(BaseHandler): self.state = hs.get_state_handler() self._auth_handler = hs.get_auth_handler() + @trace @defer.inlineCallbacks def get_devices_by_user(self, user_id): """ @@ -56,6 +58,7 @@ class DeviceWorkerHandler(BaseHandler): defer.Deferred: list[dict[str, X]]: info on each device """ + set_tag("user_id", user_id) device_map = yield self.store.get_devices_by_user(user_id) ips = yield self.store.get_last_client_ip_by_device(user_id, device_id=None) @@ -64,8 +67,10 @@ class DeviceWorkerHandler(BaseHandler): for device in devices: _update_device_from_client_ips(device, ips) + log_kv(device_map) return devices + @trace @defer.inlineCallbacks def get_device(self, user_id, device_id): """ Retrieve the given device @@ -85,9 +90,14 @@ class DeviceWorkerHandler(BaseHandler): raise errors.NotFoundError ips = yield self.store.get_last_client_ip_by_device(user_id, device_id) _update_device_from_client_ips(device, ips) + + set_tag("device", device) + set_tag("ips", ips) + return device @measure_func("device.get_user_ids_changed") + @trace @defer.inlineCallbacks def get_user_ids_changed(self, user_id, from_token): """Get list of users that have had the devices updated, or have newly @@ -97,6 +107,9 @@ class DeviceWorkerHandler(BaseHandler): user_id (str) from_token (StreamToken) """ + + set_tag("user_id", user_id) + set_tag("from_token", from_token) now_room_key = yield self.store.get_room_events_max_id() room_ids = yield self.store.get_rooms_for_user(user_id) @@ -148,6 +161,9 @@ class DeviceWorkerHandler(BaseHandler): # special-case for an empty prev state: include all members # in the changed list if not event_ids: + log_kv( + {"event": "encountered empty previous state", "room_id": room_id} + ) for key, event_id in iteritems(current_state_ids): etype, state_key = key if etype != EventTypes.Member: @@ -200,7 +216,11 @@ class DeviceWorkerHandler(BaseHandler): possibly_joined = [] possibly_left = [] - return {"changed": list(possibly_joined), "left": list(possibly_left)} + result = {"changed": list(possibly_joined), "left": list(possibly_left)} + + log_kv(result) + + return result class DeviceHandler(DeviceWorkerHandler): @@ -267,6 +287,7 @@ class DeviceHandler(DeviceWorkerHandler): raise errors.StoreError(500, "Couldn't generate a device ID.") + @trace @defer.inlineCallbacks def delete_device(self, user_id, device_id): """ Delete the given device @@ -284,6 +305,10 @@ class DeviceHandler(DeviceWorkerHandler): except errors.StoreError as e: if e.code == 404: # no match + set_tag("error", True) + log_kv( + {"reason": "User doesn't have device id.", "device_id": device_id} + ) pass else: raise @@ -296,6 +321,7 @@ class DeviceHandler(DeviceWorkerHandler): yield self.notify_device_update(user_id, [device_id]) + @trace @defer.inlineCallbacks def delete_all_devices_for_user(self, user_id, except_device_id=None): """Delete all of the user's devices @@ -331,6 +357,8 @@ class DeviceHandler(DeviceWorkerHandler): except errors.StoreError as e: if e.code == 404: # no match + set_tag("error", True) + set_tag("reason", "User doesn't have that device id.") pass else: raise @@ -371,6 +399,7 @@ class DeviceHandler(DeviceWorkerHandler): else: raise + @trace @measure_func("notify_device_update") @defer.inlineCallbacks def notify_device_update(self, user_id, device_ids): @@ -386,6 +415,8 @@ class DeviceHandler(DeviceWorkerHandler): hosts.update(get_domain_from_id(u) for u in users_who_share_room) hosts.discard(self.server_name) + set_tag("target_hosts", hosts) + position = yield self.store.add_device_change_to_streams( user_id, device_ids, list(hosts) ) @@ -405,6 +436,7 @@ class DeviceHandler(DeviceWorkerHandler): ) for host in hosts: self.federation_sender.send_device_messages(host) + log_kv({"message": "sent device update to host", "host": host}) @defer.inlineCallbacks def on_federation_query_user_devices(self, user_id): @@ -451,12 +483,15 @@ class DeviceListUpdater(object): iterable=True, ) + @trace @defer.inlineCallbacks def incoming_device_list_update(self, origin, edu_content): """Called on incoming device list update from federation. Responsible for parsing the EDU and adding to pending updates list. """ + set_tag("origin", origin) + set_tag("edu_content", edu_content) user_id = edu_content.pop("user_id") device_id = edu_content.pop("device_id") stream_id = str(edu_content.pop("stream_id")) # They may come as ints @@ -471,12 +506,30 @@ class DeviceListUpdater(object): device_id, origin, ) + + set_tag("error", True) + log_kv( + { + "message": "Got a device list update edu from a user and " + "device which does not match the origin of the request.", + "user_id": user_id, + "device_id": device_id, + } + ) return room_ids = yield self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. + set_tag("error", True) + log_kv( + { + "message": "Got an update from a user for which " + "we don't share any rooms", + "other user_id": user_id, + } + ) logger.warning( "Got device list update edu for %r/%r, but don't share a room", user_id, @@ -578,6 +631,7 @@ class DeviceListUpdater(object): request: https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid """ + log_kv({"message": "Doing resync to update device list."}) # Fetch all devices for the user. origin = get_domain_from_id(user_id) try: @@ -594,13 +648,20 @@ class DeviceListUpdater(object): # eventually become consistent. return except FederationDeniedError as e: + set_tag("error", True) + log_kv({"reason": "FederationDeniedError"}) logger.info(e) return - except Exception: + except Exception as e: # TODO: Remember that we are now out of sync and try again # later + set_tag("error", True) + log_kv( + {"message": "Exception raised by federation request", "exception": e} + ) logger.exception("Failed to handle device list update for %s", user_id) return + log_kv({"result": result}) stream_id = result["stream_id"] devices = result["devices"] diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index c7d56779b8..01731cb2d0 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -22,6 +22,7 @@ from twisted.internet import defer from synapse.api.errors import SynapseError from synapse.logging.opentracing import ( get_active_span_text_map, + log_kv, set_tag, start_active_span, whitelisted_homeserver, @@ -86,7 +87,8 @@ class DeviceMessageHandler(object): @defer.inlineCallbacks def send_device_message(self, sender_user_id, message_type, messages): - + set_tag("number_of_messages", len(messages)) + set_tag("sender", sender_user_id) local_messages = {} remote_messages = {} for user_id, by_device in messages.items(): @@ -124,6 +126,7 @@ class DeviceMessageHandler(object): else None, } + log_kv({"local_messages": local_messages}) stream_id = yield self.store.add_messages_to_device_inbox( local_messages, remote_edu_contents ) @@ -132,6 +135,7 @@ class DeviceMessageHandler(object): "to_device_key", stream_id, users=local_messages.keys() ) + log_kv({"remote_messages": remote_messages}) for destination in remote_messages.keys(): # Enqueue a new federation transaction to send the new # device messages to each remote destination. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index dd296027a1..256b972aaa 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -85,14 +85,14 @@ the function becomes the operation name for the span. return something_usual_and_useful -Operation names can be explicitly set for functions by using -``trace_using_operation_name`` +Operation names can be explicitly set for a function by passing the +operation name to ``trace`` .. code-block:: python - from synapse.logging.opentracing import trace_using_operation_name + from synapse.logging.opentracing import trace - @trace_using_operation_name("A *much* better operation name") + @trace(opname="a_better_operation_name") def interesting_badly_named_function(*args, **kwargs): # Does all kinds of cool and expected things return something_usual_and_useful @@ -641,66 +641,26 @@ def extract_text_map(carrier): # Tracing decorators -def trace(func): +def trace(func=None, opname=None): """ Decorator to trace a function. - Sets the operation name to that of the function's. + Sets the operation name to that of the function's or that given + as operation_name. See the module's doc string for usage + examples. """ - if opentracing is None: - return func - @wraps(func) - def _trace_inner(self, *args, **kwargs): - if opentracing is None: - return func(self, *args, **kwargs) - - scope = start_active_span(func.__name__) - scope.__enter__() - - try: - result = func(self, *args, **kwargs) - if isinstance(result, defer.Deferred): - - def call_back(result): - scope.__exit__(None, None, None) - return result - - def err_back(result): - scope.span.set_tag(tags.ERROR, True) - scope.__exit__(None, None, None) - return result - - result.addCallbacks(call_back, err_back) - - else: - scope.__exit__(None, None, None) - - return result - - except Exception as e: - scope.__exit__(type(e), None, e.__traceback__) - raise - - return _trace_inner - - -def trace_using_operation_name(operation_name): - """Decorator to trace a function. Explicitely sets the operation_name.""" - - def trace(func): - """ - Decorator to trace a function. - Sets the operation name to that of the function's. - """ + def decorator(func): if opentracing is None: return func + _opname = opname if opname else func.__name__ + @wraps(func) def _trace_inner(self, *args, **kwargs): if opentracing is None: return func(self, *args, **kwargs) - scope = start_active_span(operation_name) + scope = start_active_span(_opname) scope.__enter__() try: @@ -717,6 +677,7 @@ def trace_using_operation_name(operation_name): return result result.addCallbacks(call_back, err_back) + else: scope.__exit__(None, None, None) @@ -728,7 +689,10 @@ def trace_using_operation_name(operation_name): return _trace_inner - return trace + if func: + return decorator(func) + else: + return decorator def tag_args(func): diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 64b6898eb8..2e680134a0 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -24,7 +24,7 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) -from synapse.logging.opentracing import log_kv, set_tag, trace_using_operation_name +from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.types import StreamToken from ._base import client_patterns @@ -69,7 +69,7 @@ class KeyUploadServlet(RestServlet): self.auth = hs.get_auth() self.e2e_keys_handler = hs.get_e2e_keys_handler() - @trace_using_operation_name("upload_keys") + @trace(opname="upload_keys") @defer.inlineCallbacks def on_POST(self, request, device_id): requester = yield self.auth.get_user_by_req(request, allow_guest=True) diff --git a/synapse/rest/client/v2_alpha/sendtodevice.py b/synapse/rest/client/v2_alpha/sendtodevice.py index 2613648d82..d90e52ed1a 100644 --- a/synapse/rest/client/v2_alpha/sendtodevice.py +++ b/synapse/rest/client/v2_alpha/sendtodevice.py @@ -19,6 +19,7 @@ from twisted.internet import defer from synapse.http import servlet from synapse.http.servlet import parse_json_object_from_request +from synapse.logging.opentracing import set_tag, trace from synapse.rest.client.transactions import HttpTransactionCache from ._base import client_patterns @@ -42,7 +43,10 @@ class SendToDeviceRestServlet(servlet.RestServlet): self.txns = HttpTransactionCache(hs) self.device_message_handler = hs.get_device_message_handler() + @trace(opname="sendToDevice") def on_PUT(self, request, message_type, txn_id): + set_tag("message_type", message_type) + set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request( request, self._put, request, message_type, txn_id ) diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index 4dca9de617..6b7458304e 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -19,6 +19,7 @@ from canonicaljson import json from twisted.internet import defer +from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.storage._base import SQLBaseStore from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.expiringcache import ExpiringCache @@ -72,6 +73,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): "get_new_messages_for_device", get_new_messages_for_device_txn ) + @trace @defer.inlineCallbacks def delete_messages_for_device(self, user_id, device_id, up_to_stream_id): """ @@ -87,11 +89,15 @@ class DeviceInboxWorkerStore(SQLBaseStore): last_deleted_stream_id = self._last_device_delete_cache.get( (user_id, device_id), None ) + + set_tag("last_deleted_stream_id", last_deleted_stream_id) + if last_deleted_stream_id: has_changed = self._device_inbox_stream_cache.has_entity_changed( user_id, last_deleted_stream_id ) if not has_changed: + log_kv({"message": "No changes in cache since last check"}) return 0 def delete_messages_for_device_txn(txn): @@ -107,6 +113,10 @@ class DeviceInboxWorkerStore(SQLBaseStore): "delete_messages_for_device", delete_messages_for_device_txn ) + log_kv( + {"message": "deleted {} messages for device".format(count), "count": count} + ) + # Update the cache, ensuring that we only ever increase the value last_deleted_stream_id = self._last_device_delete_cache.get( (user_id, device_id), 0 @@ -117,6 +127,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): return count + @trace def get_new_device_msgs_for_remote( self, destination, last_stream_id, current_stream_id, limit ): @@ -132,16 +143,23 @@ class DeviceInboxWorkerStore(SQLBaseStore): in the stream the messages got to. """ + set_tag("destination", destination) + set_tag("last_stream_id", last_stream_id) + set_tag("current_stream_id", current_stream_id) + set_tag("limit", limit) + has_changed = self._device_federation_outbox_stream_cache.has_entity_changed( destination, last_stream_id ) if not has_changed or last_stream_id == current_stream_id: + log_kv({"message": "No new messages in stream"}) return defer.succeed(([], current_stream_id)) if limit <= 0: # This can happen if we run out of room for EDUs in the transaction. return defer.succeed(([], last_stream_id)) + @trace def get_new_messages_for_remote_destination_txn(txn): sql = ( "SELECT stream_id, messages_json FROM device_federation_outbox" @@ -156,6 +174,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): stream_pos = row[0] messages.append(json.loads(row[1])) if len(messages) < limit: + log_kv({"message": "Set stream position to current position"}) stream_pos = current_stream_id return messages, stream_pos @@ -164,6 +183,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): get_new_messages_for_remote_destination_txn, ) + @trace def delete_device_msgs_for_remote(self, destination, up_to_stream_id): """Used to delete messages when the remote destination acknowledges their receipt. @@ -214,6 +234,7 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): expiry_ms=30 * 60 * 1000, ) + @trace @defer.inlineCallbacks def add_messages_to_device_inbox( self, local_messages_by_user_then_device, remote_messages_by_destination diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 76542c512d..41f62828bd 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -23,6 +23,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.logging.opentracing import ( get_active_span_text_map, + set_tag, trace, whitelisted_homeserver, ) @@ -321,6 +322,7 @@ class DeviceWorkerStore(SQLBaseStore): def get_device_stream_token(self): return self._device_list_id_gen.get_current_token() + @trace @defer.inlineCallbacks def get_user_devices_from_cache(self, query_list): """Get the devices (and keys if any) for remote users from the cache. @@ -352,6 +354,9 @@ class DeviceWorkerStore(SQLBaseStore): else: results[user_id] = yield self._get_cached_devices_for_user(user_id) + set_tag("in_cache", results) + set_tag("not_in_cache", user_ids_not_in_cache) + return user_ids_not_in_cache, results @cachedInlineCallbacks(num_args=2, tree=True) -- cgit 1.4.1 From 2a447826665a5ac8e12736214f0ef2401e72f1f9 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 3 Sep 2019 11:42:45 +0100 Subject: Remove double return statements (#5962) Remove all the "double return" statements which were a result of us removing all the instances of ``` defer.returnValue(...) return ``` statements when we switched to python3 fully. --- changelog.d/5962.misc | 1 + synapse/api/auth.py | 1 - synapse/appservice/api.py | 3 --- synapse/handlers/appservice.py | 2 -- synapse/handlers/events.py | 1 - synapse/handlers/initial_sync.py | 2 -- synapse/handlers/room.py | 1 - synapse/handlers/sync.py | 1 - synapse/rest/client/v1/room.py | 1 - synapse/rest/client/v2_alpha/register.py | 2 -- synapse/rest/media/v1/preview_url_resource.py | 1 - synapse/state/__init__.py | 1 - synapse/storage/appservice.py | 1 - synapse/storage/directory.py | 2 -- synapse/storage/profile.py | 1 - 15 files changed, 1 insertion(+), 20 deletions(-) create mode 100644 changelog.d/5962.misc diff --git a/changelog.d/5962.misc b/changelog.d/5962.misc new file mode 100644 index 0000000000..d97d376c36 --- /dev/null +++ b/changelog.d/5962.misc @@ -0,0 +1 @@ +Remove unnecessary return statements in the codebase which were the result of a regex run. \ No newline at end of file diff --git a/synapse/api/auth.py b/synapse/api/auth.py index fd3cdf50b0..ddc195bc32 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -704,7 +704,6 @@ class Auth(object): and visibility.content["history_visibility"] == "world_readable" ): return Membership.JOIN, None - return raise AuthError( 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN ) diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 007ca75a94..3e25bf5747 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -107,7 +107,6 @@ class ApplicationServiceApi(SimpleHttpClient): except CodeMessageException as e: if e.code == 404: return False - return logger.warning("query_user to %s received %s", uri, e.code) except Exception as ex: logger.warning("query_user to %s threw exception %s", uri, ex) @@ -127,7 +126,6 @@ class ApplicationServiceApi(SimpleHttpClient): logger.warning("query_alias to %s received %s", uri, e.code) if e.code == 404: return False - return except Exception as ex: logger.warning("query_alias to %s threw exception %s", uri, ex) return False @@ -230,7 +228,6 @@ class ApplicationServiceApi(SimpleHttpClient): sent_transactions_counter.labels(service.id).inc() sent_events_counter.labels(service.id).inc(len(events)) return True - return except CodeMessageException as e: logger.warning("push_bulk to %s received %s", uri, e.code) except Exception as ex: diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index d1a51df6f9..3e9b298154 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -294,12 +294,10 @@ class ApplicationServicesHandler(object): # we don't know if they are unknown or not since it isn't one of our # users. We can't poke ASes. return False - return user_info = yield self.store.get_user_by_id(user_id) if user_info: return False - return # user not found; could be the AS though, so check. services = self.store.get_app_services() diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index 2f1f10a9af..5e748687e3 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -167,7 +167,6 @@ class EventHandler(BaseHandler): if not event: return None - return users = yield self.store.get_users_in_room(event.room_id) is_peeking = user.to_string() not in users diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 595f75400b..f991efeee3 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -450,7 +450,6 @@ class InitialSyncHandler(BaseHandler): # else it will throw. member_event = yield self.auth.check_user_was_in_room(room_id, user_id) return member_event.membership, member_event.event_id - return except AuthError: visibility = yield self.state_handler.get_current_state( room_id, EventTypes.RoomHistoryVisibility, "" @@ -460,7 +459,6 @@ class InitialSyncHandler(BaseHandler): and visibility.content["history_visibility"] == "world_readable" ): return Membership.JOIN, None - return raise AuthError( 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN ) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6e47fe7867..a509e11d69 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -852,7 +852,6 @@ class RoomContextHandler(object): ) if not event: return None - return filtered = yield (filter_evts([event])) if not filtered: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index d582f8e494..19bca6717f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -578,7 +578,6 @@ class SyncHandler(object): if not last_events: return None - return last_event = last_events[-1] state_ids = yield self.store.get_state_ids_for_event( diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index f244e8f469..3582259026 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -703,7 +703,6 @@ class RoomMembershipRestServlet(TransactionRestServlet): txn_id, ) return 200, {} - return target = requester.user if membership_action in ["invite", "ban", "unban", "kick"]: diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 65f9fce2ff..107854c669 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -230,7 +230,6 @@ class RegisterRestServlet(RestServlet): if kind == b"guest": ret = yield self._do_guest_registration(body, address=client_addr) return ret - return elif kind != b"user": raise UnrecognizedRequestError( "Do not understand membership kind: %s" % (kind,) @@ -280,7 +279,6 @@ class RegisterRestServlet(RestServlet): desired_username, access_token, body ) return 200, result # we throw for non 200 responses - return # for regular registration, downcase the provided username before # attempting to register it. This should mean diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index bd40891a7f..7a56cd4b6c 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -183,7 +183,6 @@ class PreviewUrlResource(DirectServeResource): if isinstance(og, six.text_type): og = og.encode("utf8") return og - return media_info = yield self._download_url(url, user) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index a0d34f16ea..2b0f4c79ee 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -136,7 +136,6 @@ class StateHandler(object): if event_id: event = yield self.store.get_event(event_id, allow_none=True) return event - return state_map = yield self.store.get_events( list(state.values()), get_prev_content=False diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 36657753cd..435b2acd4d 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -165,7 +165,6 @@ class ApplicationServiceTransactionWorkerStore( ) if result: return result.get("state") - return return None def set_appservice_state(self, service, state): diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index e966a73f3d..eed7757ed5 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -47,7 +47,6 @@ class DirectoryWorkerStore(SQLBaseStore): if not room_id: return None - return servers = yield self._simple_select_onecol( "room_alias_servers", @@ -58,7 +57,6 @@ class DirectoryWorkerStore(SQLBaseStore): if not servers: return None - return return RoomAliasMapping(room_id, room_alias.to_string(), servers) diff --git a/synapse/storage/profile.py b/synapse/storage/profile.py index 8a5d8e9b18..912c1df6be 100644 --- a/synapse/storage/profile.py +++ b/synapse/storage/profile.py @@ -35,7 +35,6 @@ class ProfileWorkerStore(SQLBaseStore): if e.code == 404: # no match return ProfileInfo(None, None) - return else: raise -- cgit 1.4.1 From 8401bcd206ed483a03d7bd7ea97cc401b86ed4b4 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 3 Sep 2019 12:44:14 +0100 Subject: fix typo --- synapse/handlers/presence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 8377a0ddc2..053cf66b28 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -255,7 +255,7 @@ class PresenceHandler(object): self.unpersisted_users_changes = set() if unpersisted: - logger.info("Persisting %d upersisted presence updates", len(unpersisted)) + logger.info("Persisting %d unpersisted presence updates", len(unpersisted)) yield self.store.update_presence( [self.user_to_current_state[user_id] for user_id in unpersisted] ) -- cgit 1.4.1 From 0eac7077c9e6049a86c6fb05ab127ffc84cf7315 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 3 Sep 2019 09:01:30 -0600 Subject: Ensure an auth instance is available to ListMediaInRoom (#5967) * Ensure an auth instance is available to ListMediaInRoom Fixes https://github.com/matrix-org/synapse/issues/5737 * Changelog --- changelog.d/5967.bugfix | 1 + synapse/rest/admin/media.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/5967.bugfix diff --git a/changelog.d/5967.bugfix b/changelog.d/5967.bugfix new file mode 100644 index 0000000000..8d7bf5c2e9 --- /dev/null +++ b/changelog.d/5967.bugfix @@ -0,0 +1 @@ +Fix list media admin API always returning an error. diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index f3f63f0be7..ed7086d09c 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -60,6 +60,7 @@ class ListMediaInRoom(RestServlet): def __init__(self, hs): self.store = hs.get_datastore() + self.auth = hs.get_auth() @defer.inlineCallbacks def on_GET(self, request, room_id): -- cgit 1.4.1 From 894c1a575979a52114fac09ab3814f4fe023659a Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Tue, 3 Sep 2019 16:36:01 +0100 Subject: Docker packaging should not su-exec or chmod if already running as UID/GID (#5970) Adjust su-exec to only be used if needed. If UID == getuid() and GID == getgid() then we do not need to su-exec, and chmod will not work. --- changelog.d/5970.docker | 1 + docker/start.py | 84 ++++++++++++++++++++++++++++--------------------- 2 files changed, 50 insertions(+), 35 deletions(-) create mode 100644 changelog.d/5970.docker diff --git a/changelog.d/5970.docker b/changelog.d/5970.docker new file mode 100644 index 0000000000..c9d04da9cd --- /dev/null +++ b/changelog.d/5970.docker @@ -0,0 +1 @@ +Avoid changing UID/GID if they are already correct. diff --git a/docker/start.py b/docker/start.py index 40a861f200..260f2d9943 100755 --- a/docker/start.py +++ b/docker/start.py @@ -41,8 +41,8 @@ def generate_config_from_template(config_dir, config_path, environ, ownership): config_dir (str): where to put generated config files config_path (str): where to put the main config file environ (dict): environment dictionary - ownership (str): ":" string which will be used to set - ownership of the generated configs + ownership (str|None): ":" string which will be used to set + ownership of the generated configs. If None, ownership will not change. """ for v in ("SYNAPSE_SERVER_NAME", "SYNAPSE_REPORT_STATS"): if v not in environ: @@ -105,24 +105,24 @@ def generate_config_from_template(config_dir, config_path, environ, ownership): log("Generating log config file " + log_config_file) convert("/conf/log.config", log_config_file, environ) - subprocess.check_output(["chown", "-R", ownership, "/data"]) - # Hopefully we already have a signing key, but generate one if not. - subprocess.check_output( - [ - "su-exec", - ownership, - "python", - "-m", - "synapse.app.homeserver", - "--config-path", - config_path, - # tell synapse to put generated keys in /data rather than /compiled - "--keys-directory", - config_dir, - "--generate-keys", - ] - ) + args = [ + "python", + "-m", + "synapse.app.homeserver", + "--config-path", + config_path, + # tell synapse to put generated keys in /data rather than /compiled + "--keys-directory", + config_dir, + "--generate-keys", + ] + + if ownership is not None: + subprocess.check_output(["chown", "-R", ownership, "/data"]) + args = ["su-exec", ownership] + args + + subprocess.check_output(args) def run_generate_config(environ, ownership): @@ -130,7 +130,7 @@ def run_generate_config(environ, ownership): Args: environ (dict): env var dict - ownership (str): "userid:groupid" arg for chmod + ownership (str|None): "userid:groupid" arg for chmod. If None, ownership will not change. Never returns. """ @@ -149,9 +149,6 @@ def run_generate_config(environ, ownership): log("Creating log config %s" % (log_config_file,)) convert("/conf/log.config", log_config_file, environ) - # make sure that synapse has perms to write to the data dir. - subprocess.check_output(["chown", ownership, data_dir]) - args = [ "python", "-m", @@ -170,12 +167,33 @@ def run_generate_config(environ, ownership): "--open-private-ports", ] # log("running %s" % (args, )) - os.execv("/usr/local/bin/python", args) + + if ownership is not None: + args = ["su-exec", ownership] + args + os.execv("/sbin/su-exec", args) + + # make sure that synapse has perms to write to the data dir. + subprocess.check_output(["chown", ownership, data_dir]) + else: + os.execv("/usr/local/bin/python", args) def main(args, environ): mode = args[1] if len(args) > 1 else None - ownership = "{}:{}".format(environ.get("UID", 991), environ.get("GID", 991)) + desired_uid = int(environ.get("UID", "991")) + desired_gid = int(environ.get("GID", "991")) + if (desired_uid == os.getuid()) and (desired_gid == os.getgid()): + ownership = None + else: + ownership = "{}:{}".format(desired_uid, desired_gid) + + log( + "Container running as UserID %s:%s, ENV (or defaults) requests %s:%s" + % (os.getuid(), os.getgid(), desired_uid, desired_gid) + ) + + if ownership is None: + log("Will not perform chmod/su-exec as UserID already matches request") # In generate mode, generate a configuration and missing keys, then exit if mode == "generate": @@ -227,16 +245,12 @@ def main(args, environ): log("Starting synapse with config file " + config_path) - args = [ - "su-exec", - ownership, - "python", - "-m", - "synapse.app.homeserver", - "--config-path", - config_path, - ] - os.execv("/sbin/su-exec", args) + args = ["python", "-m", "synapse.app.homeserver", "--config-path", config_path] + if ownership is not None: + args = ["su-exec", ownership] + args + os.execv("/sbin/su-exec", args) + else: + os.execv("/usr/local/bin/python", args) if __name__ == "__main__": -- cgit 1.4.1 From a98b8583c647d841b49b94a3de030b9e8d5271c0 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 3 Sep 2019 19:58:51 +0100 Subject: Remove unnecessary variable declaration --- synapse/handlers/register.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 3142d85788..1711d5ac5c 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -397,7 +397,6 @@ class RegistrationHandler(BaseHandler): @defer.inlineCallbacks def _join_user_to_room(self, requester, room_identifier): - room_id = None room_member_handler = self.hs.get_room_member_handler() if RoomID.is_valid(room_identifier): room_id = room_identifier -- cgit 1.4.1 From 6b6086b8bf8d222e528002009d921804d306d85d Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 3 Sep 2019 20:00:09 +0100 Subject: Fix docstring --- synapse/handlers/register.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 1711d5ac5c..e59b2a3684 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -681,8 +681,7 @@ class RegistrationHandler(BaseHandler): Args: user_id (str): id of user threepid (object): m.login.msisdn auth response - token (str): access_token for the user - bind_email (bool): true if the client requested the email to be + bind_msisdn (bool): true if the client requested the msisdn to be bound at the identity server Returns: defer.Deferred: -- cgit 1.4.1 From 2f416fc9976acf482163bba82e13b7d749e1c290 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 3 Sep 2019 13:35:20 -0600 Subject: Ensure the list media admin API is always available (#5966) * Ensure the list media admin API is always available This API is required for some external media repo implementations to operate (mostly for doing quarantine operations on a room). * changelog --- changelog.d/5966.bugfix | 1 + synapse/rest/admin/__init__.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 changelog.d/5966.bugfix diff --git a/changelog.d/5966.bugfix b/changelog.d/5966.bugfix new file mode 100644 index 0000000000..b8ef5a7819 --- /dev/null +++ b/changelog.d/5966.bugfix @@ -0,0 +1 @@ +Fix admin API for listing media in a room not being available with an external media repo. diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index b4761adaed..81b6bd8816 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -41,7 +41,7 @@ from synapse.rest.admin._base import ( assert_user_is_admin, historical_admin_path_patterns, ) -from synapse.rest.admin.media import register_servlets_for_media_repo +from synapse.rest.admin.media import ListMediaInRoom, register_servlets_for_media_repo from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet from synapse.rest.admin.users import UserAdminServlet @@ -761,9 +761,12 @@ def register_servlets_for_client_rest_resource(hs, http_server): DeleteGroupAdminRestServlet(hs).register(http_server) AccountValidityRenewServlet(hs).register(http_server) - # Load the media repo ones if we're using them. + # Load the media repo ones if we're using them. Otherwise load the servlets which + # don't need a media repo (typically readonly admin APIs). if hs.config.can_load_media_repo: register_servlets_for_media_repo(hs, http_server) + else: + ListMediaInRoom(hs).register(http_server) # don't add more things here: new servlets should only be exposed on # /_synapse/admin so should not go here. Instead register them in AdminRestResource. -- cgit 1.4.1 From ea128a3e8e08700b047f3e90de559704fd793cda Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 3 Sep 2019 21:05:06 +0100 Subject: code cleanups --- synapse/handlers/room_member.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index f03a2bd540..093f2ea36e 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -962,9 +962,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): ) if complexity: - if complexity["v1"] > max_complexity: - return True - return False + return complexity["v1"] > max_complexity return None @defer.inlineCallbacks @@ -980,10 +978,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): max_complexity = self.hs.config.limit_remote_rooms.complexity complexity = yield self.store.get_room_complexity(room_id) - if complexity["v1"] > max_complexity: - return True - - return False + return complexity["v1"] > max_complexity @defer.inlineCallbacks def _remote_join(self, requester, remote_room_hosts, room_id, user, content): -- cgit 1.4.1 From 6e834e94fcc97811e4cc8185e86c6b9da06eb28e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 4 Sep 2019 13:04:27 +0100 Subject: Fix and refactor room and user stats (#5971) Previously the stats were not being correctly populated. --- changelog.d/5971.bugfix | 1 + docs/room_and_user_statistics.md | 62 ++ synapse/config/stats.py | 13 +- synapse/handlers/stats.py | 307 +++--- synapse/storage/events.py | 5 +- synapse/storage/registration.py | 12 + synapse/storage/roommember.py | 44 +- .../storage/schema/delta/56/stats_separated.sql | 152 +++ synapse/storage/stats.py | 1036 ++++++++++++++------ tests/handlers/test_stats.py | 643 +++++++++--- tests/rest/client/v1/utils.py | 8 +- 11 files changed, 1642 insertions(+), 641 deletions(-) create mode 100644 changelog.d/5971.bugfix create mode 100644 docs/room_and_user_statistics.md create mode 100644 synapse/storage/schema/delta/56/stats_separated.sql diff --git a/changelog.d/5971.bugfix b/changelog.d/5971.bugfix new file mode 100644 index 0000000000..9ea095103b --- /dev/null +++ b/changelog.d/5971.bugfix @@ -0,0 +1 @@ +Fix room and user stats tracking. diff --git a/docs/room_and_user_statistics.md b/docs/room_and_user_statistics.md new file mode 100644 index 0000000000..e1facb38d4 --- /dev/null +++ b/docs/room_and_user_statistics.md @@ -0,0 +1,62 @@ +Room and User Statistics +======================== + +Synapse maintains room and user statistics (as well as a cache of room state), +in various tables. These can be used for administrative purposes but are also +used when generating the public room directory. + + +# Synapse Developer Documentation + +## High-Level Concepts + +### Definitions + +* **subject**: Something we are tracking stats about – currently a room or user. +* **current row**: An entry for a subject in the appropriate current statistics + table. Each subject can have only one. +* **historical row**: An entry for a subject in the appropriate historical + statistics table. Each subject can have any number of these. + +### Overview + +Stats are maintained as time series. There are two kinds of column: + +* absolute columns – where the value is correct for the time given by `end_ts` + in the stats row. (Imagine a line graph for these values) + * They can also be thought of as 'gauges' in Prometheus, if you are familiar. +* per-slice columns – where the value corresponds to how many of the occurrences + occurred within the time slice given by `(end_ts − bucket_size)…end_ts` + or `start_ts…end_ts`. (Imagine a histogram for these values) + +Stats are maintained in two tables (for each type): current and historical. + +Current stats correspond to the present values. Each subject can only have one +entry. + +Historical stats correspond to values in the past. Subjects may have multiple +entries. + +## Concepts around the management of stats + +### Current rows + +Current rows contain the most up-to-date statistics for a room. +They only contain absolute columns + +### Historical rows + +Historical rows can always be considered to be valid for the time slice and +end time specified. + +* historical rows will not exist for every time slice – they will be omitted + if there were no changes. In this case, the following assumptions can be + made to interpolate/recreate missing rows: + - absolute fields have the same values as in the preceding row + - per-slice fields are zero (`0`) +* historical rows will not be retained forever – rows older than a configurable + time will be purged. + +#### Purge + +The purging of historical rows is not yet implemented. diff --git a/synapse/config/stats.py b/synapse/config/stats.py index b518a3ed9c..b18ddbd1fa 100644 --- a/synapse/config/stats.py +++ b/synapse/config/stats.py @@ -27,19 +27,16 @@ class StatsConfig(Config): def read_config(self, config, **kwargs): self.stats_enabled = True - self.stats_bucket_size = 86400 + self.stats_bucket_size = 86400 * 1000 self.stats_retention = sys.maxsize stats_config = config.get("stats", None) if stats_config: self.stats_enabled = stats_config.get("enabled", self.stats_enabled) - self.stats_bucket_size = ( - self.parse_duration(stats_config.get("bucket_size", "1d")) / 1000 + self.stats_bucket_size = self.parse_duration( + stats_config.get("bucket_size", "1d") ) - self.stats_retention = ( - self.parse_duration( - stats_config.get("retention", "%ds" % (sys.maxsize,)) - ) - / 1000 + self.stats_retention = self.parse_duration( + stats_config.get("retention", "%ds" % (sys.maxsize,)) ) def generate_config_section(self, config_dir_path, server_name, **kwargs): diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 4449da6669..921735edb3 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -14,15 +14,14 @@ # limitations under the License. import logging +from collections import Counter from twisted.internet import defer -from synapse.api.constants import EventTypes, JoinRules, Membership +from synapse.api.constants import EventTypes, Membership from synapse.handlers.state_deltas import StateDeltasHandler from synapse.metrics import event_processing_positions from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.types import UserID -from synapse.util.metrics import Measure logger = logging.getLogger(__name__) @@ -62,11 +61,10 @@ class StatsHandler(StateDeltasHandler): def notify_new_event(self): """Called when there may be more deltas to process """ - if not self.hs.config.stats_enabled: + if not self.hs.config.stats_enabled or self._is_processing: return - if self._is_processing: - return + self._is_processing = True @defer.inlineCallbacks def process(): @@ -75,39 +73,72 @@ class StatsHandler(StateDeltasHandler): finally: self._is_processing = False - self._is_processing = True run_as_background_process("stats.notify_new_event", process) @defer.inlineCallbacks def _unsafe_process(self): # If self.pos is None then means we haven't fetched it from DB if self.pos is None: - self.pos = yield self.store.get_stats_stream_pos() - - # If still None then the initial background update hasn't happened yet - if self.pos is None: - return None + self.pos = yield self.store.get_stats_positions() # Loop round handling deltas until we're up to date + while True: - with Measure(self.clock, "stats_delta"): - deltas = yield self.store.get_current_state_deltas(self.pos) - if not deltas: - return + deltas = yield self.store.get_current_state_deltas(self.pos) + + if deltas: + logger.debug("Handling %d state deltas", len(deltas)) + room_deltas, user_deltas = yield self._handle_deltas(deltas) + + max_pos = deltas[-1]["stream_id"] + else: + room_deltas = {} + user_deltas = {} + max_pos = yield self.store.get_room_max_stream_ordering() - logger.info("Handling %d state deltas", len(deltas)) - yield self._handle_deltas(deltas) + # Then count deltas for total_events and total_event_bytes. + room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes( + self.pos, max_pos + ) + + for room_id, fields in room_count.items(): + room_deltas.setdefault(room_id, {}).update(fields) + + for user_id, fields in user_count.items(): + user_deltas.setdefault(user_id, {}).update(fields) + + logger.debug("room_deltas: %s", room_deltas) + logger.debug("user_deltas: %s", user_deltas) - self.pos = deltas[-1]["stream_id"] - yield self.store.update_stats_stream_pos(self.pos) + # Always call this so that we update the stats position. + yield self.store.bulk_update_stats_delta( + self.clock.time_msec(), + updates={"room": room_deltas, "user": user_deltas}, + stream_id=max_pos, + ) + + event_processing_positions.labels("stats").set(max_pos) - event_processing_positions.labels("stats").set(self.pos) + if self.pos == max_pos: + break + + self.pos = max_pos @defer.inlineCallbacks def _handle_deltas(self, deltas): + """Called with the state deltas to process + + Returns: + Deferred[tuple[dict[str, Counter], dict[str, counter]]] + Resovles to two dicts, the room deltas and the user deltas, + mapping from room/user ID to changes in the various fields. """ - Called with the state deltas to process - """ + + room_to_stats_deltas = {} + user_to_stats_deltas = {} + + room_to_state_updates = {} + for delta in deltas: typ = delta["type"] state_key = delta["state_key"] @@ -115,11 +146,10 @@ class StatsHandler(StateDeltasHandler): event_id = delta["event_id"] stream_id = delta["stream_id"] prev_event_id = delta["prev_event_id"] - stream_pos = delta["stream_id"] - logger.debug("Handling: %r %r, %s", typ, state_key, event_id) + logger.debug("Handling: %r, %r %r, %s", room_id, typ, state_key, event_id) - token = yield self.store.get_earliest_token_for_room_stats(room_id) + token = yield self.store.get_earliest_token_for_stats("room", room_id) # If the earliest token to begin from is larger than our current # stream ID, skip processing this delta. @@ -131,203 +161,130 @@ class StatsHandler(StateDeltasHandler): continue if event_id is None and prev_event_id is None: - # Errr... + logger.error( + "event ID is None and so is the previous event ID. stream_id: %s", + stream_id, + ) continue event_content = {} + sender = None if event_id is not None: event = yield self.store.get_event(event_id, allow_none=True) if event: event_content = event.content or {} + sender = event.sender + + # All the values in this dict are deltas (RELATIVE changes) + room_stats_delta = room_to_stats_deltas.setdefault(room_id, Counter()) - # We use stream_pos here rather than fetch by event_id as event_id - # may be None - now = yield self.store.get_received_ts_by_stream_pos(stream_pos) + room_state = room_to_state_updates.setdefault(room_id, {}) - # quantise time to the nearest bucket - now = (now // 1000 // self.stats_bucket_size) * self.stats_bucket_size + if prev_event_id is None: + # this state event doesn't overwrite another, + # so it is a new effective/current state event + room_stats_delta["current_state_events"] += 1 if typ == EventTypes.Member: # we could use _get_key_change here but it's a bit inefficient # given we're not testing for a specific result; might as well # just grab the prev_membership and membership strings and # compare them. - prev_event_content = {} + # We take None rather than leave as a previous membership + # in the absence of a previous event because we do not want to + # reduce the leave count when a new-to-the-room user joins. + prev_membership = None if prev_event_id is not None: prev_event = yield self.store.get_event( prev_event_id, allow_none=True ) if prev_event: prev_event_content = prev_event.content + prev_membership = prev_event_content.get( + "membership", Membership.LEAVE + ) membership = event_content.get("membership", Membership.LEAVE) - prev_membership = prev_event_content.get("membership", Membership.LEAVE) - - if prev_membership == membership: - continue - if prev_membership == Membership.JOIN: - yield self.store.update_stats_delta( - now, "room", room_id, "joined_members", -1 - ) + if prev_membership is None: + logger.debug("No previous membership for this user.") + elif membership == prev_membership: + pass # noop + elif prev_membership == Membership.JOIN: + room_stats_delta["joined_members"] -= 1 elif prev_membership == Membership.INVITE: - yield self.store.update_stats_delta( - now, "room", room_id, "invited_members", -1 - ) + room_stats_delta["invited_members"] -= 1 elif prev_membership == Membership.LEAVE: - yield self.store.update_stats_delta( - now, "room", room_id, "left_members", -1 - ) + room_stats_delta["left_members"] -= 1 elif prev_membership == Membership.BAN: - yield self.store.update_stats_delta( - now, "room", room_id, "banned_members", -1 - ) + room_stats_delta["banned_members"] -= 1 else: - err = "%s is not a valid prev_membership" % (repr(prev_membership),) - logger.error(err) - raise ValueError(err) + raise ValueError( + "%r is not a valid prev_membership" % (prev_membership,) + ) + if membership == prev_membership: + pass # noop if membership == Membership.JOIN: - yield self.store.update_stats_delta( - now, "room", room_id, "joined_members", +1 - ) + room_stats_delta["joined_members"] += 1 elif membership == Membership.INVITE: - yield self.store.update_stats_delta( - now, "room", room_id, "invited_members", +1 - ) + room_stats_delta["invited_members"] += 1 + + if sender and self.is_mine_id(sender): + user_to_stats_deltas.setdefault(sender, Counter())[ + "invites_sent" + ] += 1 + elif membership == Membership.LEAVE: - yield self.store.update_stats_delta( - now, "room", room_id, "left_members", +1 - ) + room_stats_delta["left_members"] += 1 elif membership == Membership.BAN: - yield self.store.update_stats_delta( - now, "room", room_id, "banned_members", +1 - ) + room_stats_delta["banned_members"] += 1 else: - err = "%s is not a valid membership" % (repr(membership),) - logger.error(err) - raise ValueError(err) + raise ValueError("%r is not a valid membership" % (membership,)) user_id = state_key if self.is_mine_id(user_id): - # update user_stats as it's one of our users - public = yield self._is_public_room(room_id) - - if membership == Membership.LEAVE: - yield self.store.update_stats_delta( - now, - "user", - user_id, - "public_rooms" if public else "private_rooms", - -1, - ) - elif membership == Membership.JOIN: - yield self.store.update_stats_delta( - now, - "user", - user_id, - "public_rooms" if public else "private_rooms", - +1, - ) + # this accounts for transitions like leave → ban and so on. + has_changed_joinedness = (prev_membership == Membership.JOIN) != ( + membership == Membership.JOIN + ) - elif typ == EventTypes.Create: - # Newly created room. Add it with all blank portions. - yield self.store.update_room_state( - room_id, - { - "join_rules": None, - "history_visibility": None, - "encryption": None, - "name": None, - "topic": None, - "avatar": None, - "canonical_alias": None, - }, - ) + if has_changed_joinedness: + delta = +1 if membership == Membership.JOIN else -1 - elif typ == EventTypes.JoinRules: - yield self.store.update_room_state( - room_id, {"join_rules": event_content.get("join_rule")} - ) + user_to_stats_deltas.setdefault(user_id, Counter())[ + "joined_rooms" + ] += delta - is_public = yield self._get_key_change( - prev_event_id, event_id, "join_rule", JoinRules.PUBLIC - ) - if is_public is not None: - yield self.update_public_room_stats(now, room_id, is_public) + room_stats_delta["local_users_in_room"] += delta + elif typ == EventTypes.Create: + room_state["is_federatable"] = event_content.get("m.federate", True) + if sender and self.is_mine_id(sender): + user_to_stats_deltas.setdefault(sender, Counter())[ + "rooms_created" + ] += 1 + elif typ == EventTypes.JoinRules: + room_state["join_rules"] = event_content.get("join_rule") elif typ == EventTypes.RoomHistoryVisibility: - yield self.store.update_room_state( - room_id, - {"history_visibility": event_content.get("history_visibility")}, - ) - - is_public = yield self._get_key_change( - prev_event_id, event_id, "history_visibility", "world_readable" + room_state["history_visibility"] = event_content.get( + "history_visibility" ) - if is_public is not None: - yield self.update_public_room_stats(now, room_id, is_public) - elif typ == EventTypes.Encryption: - yield self.store.update_room_state( - room_id, {"encryption": event_content.get("algorithm")} - ) + room_state["encryption"] = event_content.get("algorithm") elif typ == EventTypes.Name: - yield self.store.update_room_state( - room_id, {"name": event_content.get("name")} - ) + room_state["name"] = event_content.get("name") elif typ == EventTypes.Topic: - yield self.store.update_room_state( - room_id, {"topic": event_content.get("topic")} - ) + room_state["topic"] = event_content.get("topic") elif typ == EventTypes.RoomAvatar: - yield self.store.update_room_state( - room_id, {"avatar": event_content.get("url")} - ) + room_state["avatar"] = event_content.get("url") elif typ == EventTypes.CanonicalAlias: - yield self.store.update_room_state( - room_id, {"canonical_alias": event_content.get("alias")} - ) + room_state["canonical_alias"] = event_content.get("alias") + elif typ == EventTypes.GuestAccess: + room_state["guest_access"] = event_content.get("guest_access") - @defer.inlineCallbacks - def update_public_room_stats(self, ts, room_id, is_public): - """ - Increment/decrement a user's number of public rooms when a room they are - in changes to/from public visibility. + for room_id, state in room_to_state_updates.items(): + yield self.store.update_room_state(room_id, state) - Args: - ts (int): Timestamp in seconds - room_id (str) - is_public (bool) - """ - # For now, blindly iterate over all local users in the room so that - # we can handle the whole problem of copying buckets over as needed - user_ids = yield self.store.get_users_in_room(room_id) - - for user_id in user_ids: - if self.hs.is_mine(UserID.from_string(user_id)): - yield self.store.update_stats_delta( - ts, "user", user_id, "public_rooms", +1 if is_public else -1 - ) - yield self.store.update_stats_delta( - ts, "user", user_id, "private_rooms", -1 if is_public else +1 - ) - - @defer.inlineCallbacks - def _is_public_room(self, room_id): - join_rules = yield self.state.get_current_state(room_id, EventTypes.JoinRules) - history_visibility = yield self.state.get_current_state( - room_id, EventTypes.RoomHistoryVisibility - ) - - if (join_rules and join_rules.content.get("join_rule") == JoinRules.PUBLIC) or ( - ( - history_visibility - and history_visibility.content.get("history_visibility") - == "world_readable" - ) - ): - return True - else: - return False + return room_to_stats_deltas, user_to_stats_deltas diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 32050868ff..1958afe1d7 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2270,8 +2270,9 @@ class EventsStore( "room_aliases", "room_depth", "room_memberships", - "room_state", - "room_stats", + "room_stats_state", + "room_stats_current", + "room_stats_historical", "room_stats_earliest_token", "rooms", "stream_ordering_to_exterm", diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 3f50324253..2d3c7e2dc9 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -869,6 +869,17 @@ class RegistrationStore( (user_id_obj.localpart, create_profile_with_displayname), ) + if self.hs.config.stats_enabled: + # we create a new completed user statistics row + + # we don't strictly need current_token since this user really can't + # have any state deltas before now (as it is a new user), but still, + # we include it for completeness. + current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn) + self._update_stats_delta_txn( + txn, now, "user", user_id, {}, complete_with_stream_id=current_token + ) + self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) txn.call_after(self.is_guest.invalidate, (user_id,)) @@ -1140,6 +1151,7 @@ class RegistrationStore( deferred str|None: A str representing a link to redirect the user to if there is one. """ + # Insert everything into a transaction in order to run atomically def validate_threepid_session_txn(txn): row = self._simple_select_one_txn( diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index eecb276465..f8b682ebd9 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -112,29 +112,31 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached(max_entries=100000, iterable=True) def get_users_in_room(self, room_id): - def f(txn): - # If we can assume current_state_events.membership is up to date - # then we can avoid a join, which is a Very Good Thing given how - # frequently this function gets called. - if self._current_state_events_membership_up_to_date: - sql = """ - SELECT state_key FROM current_state_events - WHERE type = 'm.room.member' AND room_id = ? AND membership = ? - """ - else: - sql = """ - SELECT state_key FROM room_memberships as m - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? - """ + return self.runInteraction( + "get_users_in_room", self.get_users_in_room_txn, room_id + ) - txn.execute(sql, (room_id, Membership.JOIN)) - return [to_ascii(r[0]) for r in txn] + def get_users_in_room_txn(self, txn, room_id): + # If we can assume current_state_events.membership is up to date + # then we can avoid a join, which is a Very Good Thing given how + # frequently this function gets called. + if self._current_state_events_membership_up_to_date: + sql = """ + SELECT state_key FROM current_state_events + WHERE type = 'm.room.member' AND room_id = ? AND membership = ? + """ + else: + sql = """ + SELECT state_key FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? + """ - return self.runInteraction("get_users_in_room", f) + txn.execute(sql, (room_id, Membership.JOIN)) + return [to_ascii(r[0]) for r in txn] @cached(max_entries=100000) def get_room_summary(self, room_id): diff --git a/synapse/storage/schema/delta/56/stats_separated.sql b/synapse/storage/schema/delta/56/stats_separated.sql new file mode 100644 index 0000000000..163529c071 --- /dev/null +++ b/synapse/storage/schema/delta/56/stats_separated.sql @@ -0,0 +1,152 @@ +/* Copyright 2018 New Vector Ltd + * Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +----- First clean up from previous versions of room stats. + +-- First remove old stats stuff +DROP TABLE IF EXISTS room_stats; +DROP TABLE IF EXISTS room_state; +DROP TABLE IF EXISTS room_stats_state; +DROP TABLE IF EXISTS user_stats; +DROP TABLE IF EXISTS room_stats_earliest_tokens; +DROP TABLE IF EXISTS _temp_populate_stats_position; +DROP TABLE IF EXISTS _temp_populate_stats_rooms; +DROP TABLE IF EXISTS stats_stream_pos; + +-- Unschedule old background updates if they're still scheduled +DELETE FROM background_updates WHERE update_name IN ( + 'populate_stats_createtables', + 'populate_stats_process_rooms', + 'populate_stats_process_users', + 'populate_stats_cleanup' +); + +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_process_rooms', '{}', ''); + +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_process_users', '{}', 'populate_stats_process_rooms'); + +----- Create tables for our version of room stats. + +-- single-row table to track position of incremental updates +DROP TABLE IF EXISTS stats_incremental_position; +CREATE TABLE stats_incremental_position ( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT NOT NULL, + CHECK (Lock='X') +); + +-- insert a null row and make sure it is the only one. +INSERT INTO stats_incremental_position ( + stream_id +) SELECT COALESCE(MAX(stream_ordering), 0) from events; + +-- represents PRESENT room statistics for a room +-- only holds absolute fields +DROP TABLE IF EXISTS room_stats_current; +CREATE TABLE room_stats_current ( + room_id TEXT NOT NULL PRIMARY KEY, + + -- These are absolute counts + current_state_events INT NOT NULL, + joined_members INT NOT NULL, + invited_members INT NOT NULL, + left_members INT NOT NULL, + banned_members INT NOT NULL, + + local_users_in_room INT NOT NULL, + + -- The maximum delta stream position that this row takes into account. + completed_delta_stream_id BIGINT NOT NULL +); + + +-- represents HISTORICAL room statistics for a room +DROP TABLE IF EXISTS room_stats_historical; +CREATE TABLE room_stats_historical ( + room_id TEXT NOT NULL, + -- These stats cover the time from (end_ts - bucket_size)...end_ts (in ms). + -- Note that end_ts is quantised. + end_ts BIGINT NOT NULL, + bucket_size BIGINT NOT NULL, + + -- These stats are absolute counts + current_state_events BIGINT NOT NULL, + joined_members BIGINT NOT NULL, + invited_members BIGINT NOT NULL, + left_members BIGINT NOT NULL, + banned_members BIGINT NOT NULL, + local_users_in_room BIGINT NOT NULL, + + -- These stats are per time slice + total_events BIGINT NOT NULL, + total_event_bytes BIGINT NOT NULL, + + PRIMARY KEY (room_id, end_ts) +); + +-- We use this index to speed up deletion of ancient room stats. +CREATE INDEX room_stats_historical_end_ts ON room_stats_historical (end_ts); + +-- represents PRESENT statistics for a user +-- only holds absolute fields +DROP TABLE IF EXISTS user_stats_current; +CREATE TABLE user_stats_current ( + user_id TEXT NOT NULL PRIMARY KEY, + + joined_rooms BIGINT NOT NULL, + + -- The maximum delta stream position that this row takes into account. + completed_delta_stream_id BIGINT NOT NULL +); + +-- represents HISTORICAL statistics for a user +DROP TABLE IF EXISTS user_stats_historical; +CREATE TABLE user_stats_historical ( + user_id TEXT NOT NULL, + end_ts BIGINT NOT NULL, + bucket_size BIGINT NOT NULL, + + joined_rooms BIGINT NOT NULL, + + invites_sent BIGINT NOT NULL, + rooms_created BIGINT NOT NULL, + total_events BIGINT NOT NULL, + total_event_bytes BIGINT NOT NULL, + + PRIMARY KEY (user_id, end_ts) +); + +-- We use this index to speed up deletion of ancient user stats. +CREATE INDEX user_stats_historical_end_ts ON user_stats_historical (end_ts); + + +CREATE TABLE room_stats_state ( + room_id TEXT NOT NULL, + name TEXT, + canonical_alias TEXT, + join_rules TEXT, + history_visibility TEXT, + encryption TEXT, + avatar TEXT, + guest_access TEXT, + is_federatable BOOLEAN, + topic TEXT +); + +CREATE UNIQUE INDEX room_stats_state_room ON room_stats_state(room_id); diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index e13efed417..6560173c08 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2018, 2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,17 +15,22 @@ # limitations under the License. import logging +from itertools import chain from twisted.internet import defer +from twisted.internet.defer import DeferredLock from synapse.api.constants import EventTypes, Membership -from synapse.storage.prepare_database import get_statements +from synapse.storage import PostgresEngine from synapse.storage.state_deltas import StateDeltasStore from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) # these fields track absolutes (e.g. total number of rooms on the server) +# You can think of these as Prometheus Gauges. +# You can draw these stats on a line graph. +# Example: number of users in a room ABSOLUTE_STATS_FIELDS = { "room": ( "current_state_events", @@ -32,14 +38,23 @@ ABSOLUTE_STATS_FIELDS = { "invited_members", "left_members", "banned_members", - "state_events", + "local_users_in_room", ), - "user": ("public_rooms", "private_rooms"), + "user": ("joined_rooms",), } -TYPE_TO_ROOM = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user_id")} +# these fields are per-timeslice and so should be reset to 0 upon a new slice +# You can draw these stats on a histogram. +# Example: number of events sent locally during a time slice +PER_SLICE_FIELDS = { + "room": ("total_events", "total_event_bytes"), + "user": ("invites_sent", "rooms_created", "total_events", "total_event_bytes"), +} + +TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user_id")} -TEMP_TABLE = "_temp_populate_stats" +# these are the tables (& ID columns) which contain our actual subjects +TYPE_TO_ORIGIN_TABLE = {"room": ("rooms", "room_id"), "user": ("users", "name")} class StatsStore(StateDeltasStore): @@ -51,136 +66,102 @@ class StatsStore(StateDeltasStore): self.stats_enabled = hs.config.stats_enabled self.stats_bucket_size = hs.config.stats_bucket_size - self.register_background_update_handler( - "populate_stats_createtables", self._populate_stats_createtables - ) + self.stats_delta_processing_lock = DeferredLock() + self.register_background_update_handler( "populate_stats_process_rooms", self._populate_stats_process_rooms ) self.register_background_update_handler( - "populate_stats_cleanup", self._populate_stats_cleanup + "populate_stats_process_users", self._populate_stats_process_users ) + # we no longer need to perform clean-up, but we will give ourselves + # the potential to reintroduce it in the future – so documentation + # will still encourage the use of this no-op handler. + self.register_noop_background_update("populate_stats_cleanup") + self.register_noop_background_update("populate_stats_prepare") - @defer.inlineCallbacks - def _populate_stats_createtables(self, progress, batch_size): - - if not self.stats_enabled: - yield self._end_background_update("populate_stats_createtables") - return 1 - - # Get all the rooms that we want to process. - def _make_staging_area(txn): - # Create the temporary tables - stmts = get_statements( - """ - -- We just recreate the table, we'll be reinserting the - -- correct entries again later anyway. - DROP TABLE IF EXISTS {temp}_rooms; - - CREATE TABLE IF NOT EXISTS {temp}_rooms( - room_id TEXT NOT NULL, - events BIGINT NOT NULL - ); - - CREATE INDEX {temp}_rooms_events - ON {temp}_rooms(events); - CREATE INDEX {temp}_rooms_id - ON {temp}_rooms(room_id); - """.format( - temp=TEMP_TABLE - ).splitlines() - ) - - for statement in stmts: - txn.execute(statement) - - sql = ( - "CREATE TABLE IF NOT EXISTS " - + TEMP_TABLE - + "_position(position TEXT NOT NULL)" - ) - txn.execute(sql) - - # Get rooms we want to process from the database, only adding - # those that we haven't (i.e. those not in room_stats_earliest_token) - sql = """ - INSERT INTO %s_rooms (room_id, events) - SELECT c.room_id, count(*) FROM current_state_events AS c - LEFT JOIN room_stats_earliest_token AS t USING (room_id) - WHERE t.room_id IS NULL - GROUP BY c.room_id - """ % ( - TEMP_TABLE, - ) - txn.execute(sql) + def quantise_stats_time(self, ts): + """ + Quantises a timestamp to be a multiple of the bucket size. - new_pos = yield self.get_max_stream_id_in_current_state_deltas() - yield self.runInteraction("populate_stats_temp_build", _make_staging_area) - yield self._simple_insert(TEMP_TABLE + "_position", {"position": new_pos}) - self.get_earliest_token_for_room_stats.invalidate_all() + Args: + ts (int): the timestamp to quantise, in milliseconds since the Unix + Epoch - yield self._end_background_update("populate_stats_createtables") - return 1 + Returns: + int: a timestamp which + - is divisible by the bucket size; + - is no later than `ts`; and + - is the largest such timestamp. + """ + return (ts // self.stats_bucket_size) * self.stats_bucket_size @defer.inlineCallbacks - def _populate_stats_cleanup(self, progress, batch_size): + def _populate_stats_process_users(self, progress, batch_size): """ - Update the user directory stream position, then clean up the old tables. + This is a background update which regenerates statistics for users. """ if not self.stats_enabled: - yield self._end_background_update("populate_stats_cleanup") + yield self._end_background_update("populate_stats_process_users") return 1 - position = yield self._simple_select_one_onecol( - TEMP_TABLE + "_position", None, "position" + last_user_id = progress.get("last_user_id", "") + + def _get_next_batch(txn): + sql = """ + SELECT DISTINCT name FROM users + WHERE name > ? + ORDER BY name ASC + LIMIT ? + """ + txn.execute(sql, (last_user_id, batch_size)) + return [r for r, in txn] + + users_to_work_on = yield self.runInteraction( + "_populate_stats_process_users", _get_next_batch ) - yield self.update_stats_stream_pos(position) - def _delete_staging_area(txn): - txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms") - txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position") + # No more rooms -- complete the transaction. + if not users_to_work_on: + yield self._end_background_update("populate_stats_process_users") + return 1 - yield self.runInteraction("populate_stats_cleanup", _delete_staging_area) + for user_id in users_to_work_on: + yield self._calculate_and_set_initial_state_for_user(user_id) + progress["last_user_id"] = user_id - yield self._end_background_update("populate_stats_cleanup") - return 1 + yield self.runInteraction( + "populate_stats_process_users", + self._background_update_progress_txn, + "populate_stats_process_users", + progress, + ) + + return len(users_to_work_on) @defer.inlineCallbacks def _populate_stats_process_rooms(self, progress, batch_size): - + """ + This is a background update which regenerates statistics for rooms. + """ if not self.stats_enabled: yield self._end_background_update("populate_stats_process_rooms") return 1 - # If we don't have progress filed, delete everything. - if not progress: - yield self.delete_all_stats() + last_room_id = progress.get("last_room_id", "") def _get_next_batch(txn): - # Only fetch 250 rooms, so we don't fetch too many at once, even - # if those 250 rooms have less than batch_size state events. sql = """ - SELECT room_id, events FROM %s_rooms - ORDER BY events DESC - LIMIT 250 - """ % ( - TEMP_TABLE, - ) - txn.execute(sql) - rooms_to_work_on = txn.fetchall() - - if not rooms_to_work_on: - return None - - # Get how many are left to process, so we can give status on how - # far we are in processing - txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms") - progress["remaining"] = txn.fetchone()[0] - - return rooms_to_work_on + SELECT DISTINCT room_id FROM current_state_events + WHERE room_id > ? + ORDER BY room_id ASC + LIMIT ? + """ + txn.execute(sql, (last_room_id, batch_size)) + return [r for r, in txn] rooms_to_work_on = yield self.runInteraction( - "populate_stats_temp_read", _get_next_batch + "populate_stats_rooms_get_batch", _get_next_batch ) # No more rooms -- complete the transaction. @@ -188,154 +169,28 @@ class StatsStore(StateDeltasStore): yield self._end_background_update("populate_stats_process_rooms") return 1 - logger.info( - "Processing the next %d rooms of %d remaining", - len(rooms_to_work_on), - progress["remaining"], - ) - - # Number of state events we've processed by going through each room - processed_event_count = 0 - - for room_id, event_count in rooms_to_work_on: - - current_state_ids = yield self.get_current_state_ids(room_id) - - join_rules_id = current_state_ids.get((EventTypes.JoinRules, "")) - history_visibility_id = current_state_ids.get( - (EventTypes.RoomHistoryVisibility, "") - ) - encryption_id = current_state_ids.get((EventTypes.RoomEncryption, "")) - name_id = current_state_ids.get((EventTypes.Name, "")) - topic_id = current_state_ids.get((EventTypes.Topic, "")) - avatar_id = current_state_ids.get((EventTypes.RoomAvatar, "")) - canonical_alias_id = current_state_ids.get((EventTypes.CanonicalAlias, "")) - - event_ids = [ - join_rules_id, - history_visibility_id, - encryption_id, - name_id, - topic_id, - avatar_id, - canonical_alias_id, - ] - - state_events = yield self.get_events( - [ev for ev in event_ids if ev is not None] - ) - - def _get_or_none(event_id, arg): - event = state_events.get(event_id) - if event: - return event.content.get(arg) - return None - - yield self.update_room_state( - room_id, - { - "join_rules": _get_or_none(join_rules_id, "join_rule"), - "history_visibility": _get_or_none( - history_visibility_id, "history_visibility" - ), - "encryption": _get_or_none(encryption_id, "algorithm"), - "name": _get_or_none(name_id, "name"), - "topic": _get_or_none(topic_id, "topic"), - "avatar": _get_or_none(avatar_id, "url"), - "canonical_alias": _get_or_none(canonical_alias_id, "alias"), - }, - ) + for room_id in rooms_to_work_on: + yield self._calculate_and_set_initial_state_for_room(room_id) + progress["last_room_id"] = room_id - now = self.hs.get_reactor().seconds() - - # quantise time to the nearest bucket - now = (now // self.stats_bucket_size) * self.stats_bucket_size - - def _fetch_data(txn): - - # Get the current token of the room - current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn) - - current_state_events = len(current_state_ids) - - membership_counts = self._get_user_counts_in_room_txn(txn, room_id) - - total_state_events = self._get_total_state_event_counts_txn( - txn, room_id - ) - - self._update_stats_txn( - txn, - "room", - room_id, - now, - { - "bucket_size": self.stats_bucket_size, - "current_state_events": current_state_events, - "joined_members": membership_counts.get(Membership.JOIN, 0), - "invited_members": membership_counts.get(Membership.INVITE, 0), - "left_members": membership_counts.get(Membership.LEAVE, 0), - "banned_members": membership_counts.get(Membership.BAN, 0), - "state_events": total_state_events, - }, - ) - self._simple_insert_txn( - txn, - "room_stats_earliest_token", - {"room_id": room_id, "token": current_token}, - ) - - # We've finished a room. Delete it from the table. - self._simple_delete_one_txn( - txn, TEMP_TABLE + "_rooms", {"room_id": room_id} - ) - - yield self.runInteraction("update_room_stats", _fetch_data) - - # Update the remaining counter. - progress["remaining"] -= 1 - yield self.runInteraction( - "populate_stats", - self._background_update_progress_txn, - "populate_stats_process_rooms", - progress, - ) - - processed_event_count += event_count - - if processed_event_count > batch_size: - # Don't process any more rooms, we've hit our batch size. - return processed_event_count + yield self.runInteraction( + "_populate_stats_process_rooms", + self._background_update_progress_txn, + "populate_stats_process_rooms", + progress, + ) - return processed_event_count + return len(rooms_to_work_on) - def delete_all_stats(self): + def get_stats_positions(self): """ - Delete all statistics records. + Returns the stats processor positions. """ - - def _delete_all_stats_txn(txn): - txn.execute("DELETE FROM room_state") - txn.execute("DELETE FROM room_stats") - txn.execute("DELETE FROM room_stats_earliest_token") - txn.execute("DELETE FROM user_stats") - - return self.runInteraction("delete_all_stats", _delete_all_stats_txn) - - def get_stats_stream_pos(self): return self._simple_select_one_onecol( - table="stats_stream_pos", + table="stats_incremental_position", keyvalues={}, retcol="stream_id", - desc="stats_stream_pos", - ) - - def update_stats_stream_pos(self, stream_id): - return self._simple_update_one( - table="stats_stream_pos", - keyvalues={}, - updatevalues={"stream_id": stream_id}, - desc="update_stats_stream_pos", + desc="stats_incremental_position", ) def update_room_state(self, room_id, fields): @@ -361,42 +216,87 @@ class StatsStore(StateDeltasStore): fields[col] = None return self._simple_upsert( - table="room_state", + table="room_stats_state", keyvalues={"room_id": room_id}, values=fields, desc="update_room_state", ) - def get_deltas_for_room(self, room_id, start, size=100): + def get_statistics_for_subject(self, stats_type, stats_id, start, size=100): """ - Get statistics deltas for a given room. + Get statistics for a given subject. Args: - room_id (str) + stats_type (str): The type of subject + stats_id (str): The ID of the subject (e.g. room_id or user_id) start (int): Pagination start. Number of entries, not timestamp. size (int): How many entries to return. Returns: Deferred[list[dict]], where the dict has the keys of - ABSOLUTE_STATS_FIELDS["room"] and "ts". + ABSOLUTE_STATS_FIELDS[stats_type], and "bucket_size" and "end_ts". """ - return self._simple_select_list_paginate( - "room_stats", - {"room_id": room_id}, - "ts", + return self.runInteraction( + "get_statistics_for_subject", + self._get_statistics_for_subject_txn, + stats_type, + stats_id, + start, + size, + ) + + def _get_statistics_for_subject_txn( + self, txn, stats_type, stats_id, start, size=100 + ): + """ + Transaction-bound version of L{get_statistics_for_subject}. + """ + + table, id_col = TYPE_TO_TABLE[stats_type] + selected_columns = list( + ABSOLUTE_STATS_FIELDS[stats_type] + PER_SLICE_FIELDS[stats_type] + ) + + slice_list = self._simple_select_list_paginate_txn( + txn, + table + "_historical", + {id_col: stats_id}, + "end_ts", start, size, - retcols=(list(ABSOLUTE_STATS_FIELDS["room"]) + ["ts"]), + retcols=selected_columns + ["bucket_size", "end_ts"], order_direction="DESC", ) - def get_all_room_state(self): - return self._simple_select_list( - "room_state", None, retcols=("name", "topic", "canonical_alias") + return slice_list + + def get_room_stats_state(self, room_id): + """ + Returns the current room_stats_state for a room. + + Args: + room_id (str): The ID of the room to return state for. + + Returns (dict): + Dictionary containing these keys: + "name", "topic", "canonical_alias", "avatar", "join_rules", + "history_visibility" + """ + return self._simple_select_one( + "room_stats_state", + {"room_id": room_id}, + retcols=( + "name", + "topic", + "canonical_alias", + "avatar", + "join_rules", + "history_visibility", + ), ) @cached() - def get_earliest_token_for_room_stats(self, room_id): + def get_earliest_token_for_stats(self, stats_type, id): """ Fetch the "earliest token". This is used by the room stats delta processor to ignore deltas that have been processed between the @@ -406,79 +306,571 @@ class StatsStore(StateDeltasStore): Returns: Deferred[int] """ + table, id_col = TYPE_TO_TABLE[stats_type] + return self._simple_select_one_onecol( - "room_stats_earliest_token", - {"room_id": room_id}, - retcol="token", + "%s_current" % (table,), + keyvalues={id_col: id}, + retcol="completed_delta_stream_id", allow_none=True, ) - def update_stats(self, stats_type, stats_id, ts, fields): - table, id_col = TYPE_TO_ROOM[stats_type] - return self._simple_upsert( - table=table, - keyvalues={id_col: stats_id, "ts": ts}, - values=fields, - desc="update_stats", + def bulk_update_stats_delta(self, ts, updates, stream_id): + """Bulk update stats tables for a given stream_id and updates the stats + incremental position. + + Args: + ts (int): Current timestamp in ms + updates(dict[str, dict[str, dict[str, Counter]]]): The updates to + commit as a mapping stats_type -> stats_id -> field -> delta. + stream_id (int): Current position. + + Returns: + Deferred + """ + + def _bulk_update_stats_delta_txn(txn): + for stats_type, stats_updates in updates.items(): + for stats_id, fields in stats_updates.items(): + self._update_stats_delta_txn( + txn, + ts=ts, + stats_type=stats_type, + stats_id=stats_id, + fields=fields, + complete_with_stream_id=stream_id, + ) + + self._simple_update_one_txn( + txn, + table="stats_incremental_position", + keyvalues={}, + updatevalues={"stream_id": stream_id}, + ) + + return self.runInteraction( + "bulk_update_stats_delta", _bulk_update_stats_delta_txn ) - def _update_stats_txn(self, txn, stats_type, stats_id, ts, fields): - table, id_col = TYPE_TO_ROOM[stats_type] - return self._simple_upsert_txn( - txn, table=table, keyvalues={id_col: stats_id, "ts": ts}, values=fields + def update_stats_delta( + self, + ts, + stats_type, + stats_id, + fields, + complete_with_stream_id, + absolute_field_overrides=None, + ): + """ + Updates the statistics for a subject, with a delta (difference/relative + change). + + Args: + ts (int): timestamp of the change + stats_type (str): "room" or "user" – the kind of subject + stats_id (str): the subject's ID (room ID or user ID) + fields (dict[str, int]): Deltas of stats values. + complete_with_stream_id (int, optional): + If supplied, converts an incomplete row into a complete row, + with the supplied stream_id marked as the stream_id where the + row was completed. + absolute_field_overrides (dict[str, int]): Current stats values + (i.e. not deltas) of absolute fields. + Does not work with per-slice fields. + """ + + return self.runInteraction( + "update_stats_delta", + self._update_stats_delta_txn, + ts, + stats_type, + stats_id, + fields, + complete_with_stream_id=complete_with_stream_id, + absolute_field_overrides=absolute_field_overrides, ) - def update_stats_delta(self, ts, stats_type, stats_id, field, value): - def _update_stats_delta(txn): - table, id_col = TYPE_TO_ROOM[stats_type] - - sql = ( - "SELECT * FROM %s" - " WHERE %s=? and ts=(" - " SELECT MAX(ts) FROM %s" - " WHERE %s=?" - ")" - ) % (table, id_col, table, id_col) - txn.execute(sql, (stats_id, stats_id)) - rows = self.cursor_to_dict(txn) - if len(rows) == 0: - # silently skip as we don't have anything to apply a delta to yet. - # this tries to minimise any race between the initial sync and - # subsequent deltas arriving. - return - - current_ts = ts - latest_ts = rows[0]["ts"] - if current_ts < latest_ts: - # This one is in the past, but we're just encountering it now. - # Mark it as part of the current bucket. - current_ts = latest_ts - elif ts != latest_ts: - # we have to copy our absolute counters over to the new entry. - values = { - key: rows[0][key] for key in ABSOLUTE_STATS_FIELDS[stats_type] - } - values[id_col] = stats_id - values["ts"] = ts - values["bucket_size"] = self.stats_bucket_size - - self._simple_insert_txn(txn, table=table, values=values) - - # actually update the new value - if stats_type in ABSOLUTE_STATS_FIELDS[stats_type]: - self._simple_update_txn( - txn, - table=table, - keyvalues={id_col: stats_id, "ts": current_ts}, - updatevalues={field: value}, + def _update_stats_delta_txn( + self, + txn, + ts, + stats_type, + stats_id, + fields, + complete_with_stream_id, + absolute_field_overrides=None, + ): + if absolute_field_overrides is None: + absolute_field_overrides = {} + + table, id_col = TYPE_TO_TABLE[stats_type] + + quantised_ts = self.quantise_stats_time(int(ts)) + end_ts = quantised_ts + self.stats_bucket_size + + # Lets be paranoid and check that all the given field names are known + abs_field_names = ABSOLUTE_STATS_FIELDS[stats_type] + slice_field_names = PER_SLICE_FIELDS[stats_type] + for field in chain(fields.keys(), absolute_field_overrides.keys()): + if field not in abs_field_names and field not in slice_field_names: + # guard against potential SQL injection dodginess + raise ValueError( + "%s is not a recognised field" + " for stats type %s" % (field, stats_type) ) + + # Per slice fields do not get added to the _current table + + # This calculates the deltas (`field = field + ?` values) + # for absolute fields, + # * defaulting to 0 if not specified + # (required for the INSERT part of upserting to work) + # * omitting overrides specified in `absolute_field_overrides` + deltas_of_absolute_fields = { + key: fields.get(key, 0) + for key in abs_field_names + if key not in absolute_field_overrides + } + + # Keep the delta stream ID field up to date + absolute_field_overrides = absolute_field_overrides.copy() + absolute_field_overrides["completed_delta_stream_id"] = complete_with_stream_id + + # first upsert the `_current` table + self._upsert_with_additive_relatives_txn( + txn=txn, + table=table + "_current", + keyvalues={id_col: stats_id}, + absolutes=absolute_field_overrides, + additive_relatives=deltas_of_absolute_fields, + ) + + per_slice_additive_relatives = { + key: fields.get(key, 0) for key in slice_field_names + } + self._upsert_copy_from_table_with_additive_relatives_txn( + txn=txn, + into_table=table + "_historical", + keyvalues={id_col: stats_id}, + extra_dst_insvalues={"bucket_size": self.stats_bucket_size}, + extra_dst_keyvalues={"end_ts": end_ts}, + additive_relatives=per_slice_additive_relatives, + src_table=table + "_current", + copy_columns=abs_field_names, + ) + + def _upsert_with_additive_relatives_txn( + self, txn, table, keyvalues, absolutes, additive_relatives + ): + """Used to update values in the stats tables. + + This is basically a slightly convoluted upsert that *adds* to any + existing rows. + + Args: + txn + table (str): Table name + keyvalues (dict[str, any]): Row-identifying key values + absolutes (dict[str, any]): Absolute (set) fields + additive_relatives (dict[str, int]): Fields that will be added onto + if existing row present. + """ + if self.database_engine.can_native_upsert: + absolute_updates = [ + "%(field)s = EXCLUDED.%(field)s" % {"field": field} + for field in absolutes.keys() + ] + + relative_updates = [ + "%(field)s = EXCLUDED.%(field)s + %(table)s.%(field)s" + % {"table": table, "field": field} + for field in additive_relatives.keys() + ] + + insert_cols = [] + qargs = [] + + for (key, val) in chain( + keyvalues.items(), absolutes.items(), additive_relatives.items() + ): + insert_cols.append(key) + qargs.append(val) + + sql = """ + INSERT INTO %(table)s (%(insert_cols_cs)s) + VALUES (%(insert_vals_qs)s) + ON CONFLICT (%(key_columns)s) DO UPDATE SET %(updates)s + """ % { + "table": table, + "insert_cols_cs": ", ".join(insert_cols), + "insert_vals_qs": ", ".join( + ["?"] * (len(keyvalues) + len(absolutes) + len(additive_relatives)) + ), + "key_columns": ", ".join(keyvalues), + "updates": ", ".join(chain(absolute_updates, relative_updates)), + } + + txn.execute(sql, qargs) + else: + self.database_engine.lock_table(txn, table) + retcols = list(chain(absolutes.keys(), additive_relatives.keys())) + current_row = self._simple_select_one_txn( + txn, table, keyvalues, retcols, allow_none=True + ) + if current_row is None: + merged_dict = {**keyvalues, **absolutes, **additive_relatives} + self._simple_insert_txn(txn, table, merged_dict) + else: + for (key, val) in additive_relatives.items(): + current_row[key] += val + current_row.update(absolutes) + self._simple_update_one_txn(txn, table, keyvalues, current_row) + + def _upsert_copy_from_table_with_additive_relatives_txn( + self, + txn, + into_table, + keyvalues, + extra_dst_keyvalues, + extra_dst_insvalues, + additive_relatives, + src_table, + copy_columns, + ): + """Updates the historic stats table with latest updates. + + This involves copying "absolute" fields from the `_current` table, and + adding relative fields to any existing values. + + Args: + txn: Transaction + into_table (str): The destination table to UPSERT the row into + keyvalues (dict[str, any]): Row-identifying key values + extra_dst_keyvalues (dict[str, any]): Additional keyvalues + for `into_table`. + extra_dst_insvalues (dict[str, any]): Additional values to insert + on new row creation for `into_table`. + additive_relatives (dict[str, any]): Fields that will be added onto + if existing row present. (Must be disjoint from copy_columns.) + src_table (str): The source table to copy from + copy_columns (iterable[str]): The list of columns to copy + """ + if self.database_engine.can_native_upsert: + ins_columns = chain( + keyvalues, + copy_columns, + additive_relatives, + extra_dst_keyvalues, + extra_dst_insvalues, + ) + sel_exprs = chain( + keyvalues, + copy_columns, + ( + "?" + for _ in chain( + additive_relatives, extra_dst_keyvalues, extra_dst_insvalues + ) + ), + ) + keyvalues_where = ("%s = ?" % f for f in keyvalues) + + sets_cc = ("%s = EXCLUDED.%s" % (f, f) for f in copy_columns) + sets_ar = ( + "%s = EXCLUDED.%s + %s.%s" % (f, f, into_table, f) + for f in additive_relatives + ) + + sql = """ + INSERT INTO %(into_table)s (%(ins_columns)s) + SELECT %(sel_exprs)s + FROM %(src_table)s + WHERE %(keyvalues_where)s + ON CONFLICT (%(keyvalues)s) + DO UPDATE SET %(sets)s + """ % { + "into_table": into_table, + "ins_columns": ", ".join(ins_columns), + "sel_exprs": ", ".join(sel_exprs), + "keyvalues_where": " AND ".join(keyvalues_where), + "src_table": src_table, + "keyvalues": ", ".join( + chain(keyvalues.keys(), extra_dst_keyvalues.keys()) + ), + "sets": ", ".join(chain(sets_cc, sets_ar)), + } + + qargs = list( + chain( + additive_relatives.values(), + extra_dst_keyvalues.values(), + extra_dst_insvalues.values(), + keyvalues.values(), + ) + ) + txn.execute(sql, qargs) + else: + self.database_engine.lock_table(txn, into_table) + src_row = self._simple_select_one_txn( + txn, src_table, keyvalues, copy_columns + ) + all_dest_keyvalues = {**keyvalues, **extra_dst_keyvalues} + dest_current_row = self._simple_select_one_txn( + txn, + into_table, + keyvalues=all_dest_keyvalues, + retcols=list(chain(additive_relatives.keys(), copy_columns)), + allow_none=True, + ) + + if dest_current_row is None: + merged_dict = { + **keyvalues, + **extra_dst_keyvalues, + **extra_dst_insvalues, + **src_row, + **additive_relatives, + } + self._simple_insert_txn(txn, into_table, merged_dict) else: - sql = ("UPDATE %s SET %s=%s+? WHERE %s=? AND ts=?") % ( - table, - field, - field, - id_col, + for (key, val) in additive_relatives.items(): + src_row[key] = dest_current_row[key] + val + self._simple_update_txn(txn, into_table, all_dest_keyvalues, src_row) + + def get_changes_room_total_events_and_bytes(self, min_pos, max_pos): + """Fetches the counts of events in the given range of stream IDs. + + Args: + min_pos (int) + max_pos (int) + + Returns: + Deferred[dict[str, dict[str, int]]]: Mapping of room ID to field + changes. + """ + + return self.runInteraction( + "stats_incremental_total_events_and_bytes", + self.get_changes_room_total_events_and_bytes_txn, + min_pos, + max_pos, + ) + + def get_changes_room_total_events_and_bytes_txn(self, txn, low_pos, high_pos): + """Gets the total_events and total_event_bytes counts for rooms and + senders, in a range of stream_orderings (including backfilled events). + + Args: + txn + low_pos (int): Low stream ordering + high_pos (int): High stream ordering + + Returns: + tuple[dict[str, dict[str, int]], dict[str, dict[str, int]]]: The + room and user deltas for total_events/total_event_bytes in the + format of `stats_id` -> fields + """ + + if low_pos >= high_pos: + # nothing to do here. + return {}, {} + + if isinstance(self.database_engine, PostgresEngine): + new_bytes_expression = "OCTET_LENGTH(json)" + else: + new_bytes_expression = "LENGTH(CAST(json AS BLOB))" + + sql = """ + SELECT events.room_id, COUNT(*) AS new_events, SUM(%s) AS new_bytes + FROM events INNER JOIN event_json USING (event_id) + WHERE (? < stream_ordering AND stream_ordering <= ?) + OR (? <= stream_ordering AND stream_ordering <= ?) + GROUP BY events.room_id + """ % ( + new_bytes_expression, + ) + + txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos)) + + room_deltas = { + room_id: {"total_events": new_events, "total_event_bytes": new_bytes} + for room_id, new_events, new_bytes in txn + } + + sql = """ + SELECT events.sender, COUNT(*) AS new_events, SUM(%s) AS new_bytes + FROM events INNER JOIN event_json USING (event_id) + WHERE (? < stream_ordering AND stream_ordering <= ?) + OR (? <= stream_ordering AND stream_ordering <= ?) + GROUP BY events.sender + """ % ( + new_bytes_expression, + ) + + txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos)) + + user_deltas = { + user_id: {"total_events": new_events, "total_event_bytes": new_bytes} + for user_id, new_events, new_bytes in txn + if self.hs.is_mine_id(user_id) + } + + return room_deltas, user_deltas + + @defer.inlineCallbacks + def _calculate_and_set_initial_state_for_room(self, room_id): + """Calculate and insert an entry into room_stats_current. + + Args: + room_id (str) + + Returns: + Deferred[tuple[dict, dict, int]]: A tuple of room state, membership + counts and stream position. + """ + + def _fetch_current_state_stats(txn): + pos = self.get_room_max_stream_ordering() + + rows = self._simple_select_many_txn( + txn, + table="current_state_events", + column="type", + iterable=[ + EventTypes.Create, + EventTypes.JoinRules, + EventTypes.RoomHistoryVisibility, + EventTypes.Encryption, + EventTypes.Name, + EventTypes.Topic, + EventTypes.RoomAvatar, + EventTypes.CanonicalAlias, + ], + keyvalues={"room_id": room_id, "state_key": ""}, + retcols=["event_id"], + ) + + event_ids = [row["event_id"] for row in rows] + + txn.execute( + """ + SELECT membership, count(*) FROM current_state_events + WHERE room_id = ? AND type = 'm.room.member' + GROUP BY membership + """, + (room_id,), + ) + membership_counts = {membership: cnt for membership, cnt in txn} + + txn.execute( + """ + SELECT COALESCE(count(*), 0) FROM current_state_events + WHERE room_id = ? + """, + (room_id,), + ) + + current_state_events_count, = txn.fetchone() + + users_in_room = self.get_users_in_room_txn(txn, room_id) + + return ( + event_ids, + membership_counts, + current_state_events_count, + users_in_room, + pos, + ) + + ( + event_ids, + membership_counts, + current_state_events_count, + users_in_room, + pos, + ) = yield self.runInteraction( + "get_initial_state_for_room", _fetch_current_state_stats + ) + + state_event_map = yield self.get_events(event_ids, get_prev_content=False) + + room_state = { + "join_rules": None, + "history_visibility": None, + "encryption": None, + "name": None, + "topic": None, + "avatar": None, + "canonical_alias": None, + "is_federatable": True, + } + + for event in state_event_map.values(): + if event.type == EventTypes.JoinRules: + room_state["join_rules"] = event.content.get("join_rule") + elif event.type == EventTypes.RoomHistoryVisibility: + room_state["history_visibility"] = event.content.get( + "history_visibility" ) - txn.execute(sql, (value, stats_id, current_ts)) + elif event.type == EventTypes.Encryption: + room_state["encryption"] = event.content.get("algorithm") + elif event.type == EventTypes.Name: + room_state["name"] = event.content.get("name") + elif event.type == EventTypes.Topic: + room_state["topic"] = event.content.get("topic") + elif event.type == EventTypes.RoomAvatar: + room_state["avatar"] = event.content.get("url") + elif event.type == EventTypes.CanonicalAlias: + room_state["canonical_alias"] = event.content.get("alias") + elif event.type == EventTypes.Create: + room_state["is_federatable"] = event.content.get("m.federate", True) + + yield self.update_room_state(room_id, room_state) + + local_users_in_room = [u for u in users_in_room if self.hs.is_mine_id(u)] + + yield self.update_stats_delta( + ts=self.clock.time_msec(), + stats_type="room", + stats_id=room_id, + fields={}, + complete_with_stream_id=pos, + absolute_field_overrides={ + "current_state_events": current_state_events_count, + "joined_members": membership_counts.get(Membership.JOIN, 0), + "invited_members": membership_counts.get(Membership.INVITE, 0), + "left_members": membership_counts.get(Membership.LEAVE, 0), + "banned_members": membership_counts.get(Membership.BAN, 0), + "local_users_in_room": len(local_users_in_room), + }, + ) + + @defer.inlineCallbacks + def _calculate_and_set_initial_state_for_user(self, user_id): + def _calculate_and_set_initial_state_for_user_txn(txn): + pos = self._get_max_stream_id_in_current_state_deltas_txn(txn) - return self.runInteraction("update_stats_delta", _update_stats_delta) + txn.execute( + """ + SELECT COUNT(distinct room_id) FROM current_state_events + WHERE type = 'm.room.member' AND state_key = ? + AND membership = 'join' + """, + (user_id,), + ) + count, = txn.fetchone() + return count, pos + + joined_rooms, pos = yield self.runInteraction( + "calculate_and_set_initial_state_for_user", + _calculate_and_set_initial_state_for_user_txn, + ) + + yield self.update_stats_delta( + ts=self.clock.time_msec(), + stats_type="user", + stats_id=user_id, + fields={}, + complete_with_stream_id=pos, + absolute_field_overrides={"joined_rooms": joined_rooms}, + ) diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py index a8b858eb4f..7569b6fab5 100644 --- a/tests/handlers/test_stats.py +++ b/tests/handlers/test_stats.py @@ -13,16 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from mock import Mock - -from twisted.internet import defer - -from synapse.api.constants import EventTypes, Membership +from synapse import storage from synapse.rest import admin from synapse.rest.client.v1 import login, room from tests import unittest +# The expected number of state events in a fresh public room. +EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM = 5 +# The expected number of state events in a fresh private room. +EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM = 6 + class StatsRoomTests(unittest.HomeserverTestCase): @@ -33,7 +34,6 @@ class StatsRoomTests(unittest.HomeserverTestCase): ] def prepare(self, reactor, clock, hs): - self.store = hs.get_datastore() self.handler = self.hs.get_stats_handler() @@ -47,7 +47,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): self.get_success( self.store._simple_insert( "background_updates", - {"update_name": "populate_stats_createtables", "progress_json": "{}"}, + {"update_name": "populate_stats_prepare", "progress_json": "{}"}, ) ) self.get_success( @@ -56,7 +56,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): { "update_name": "populate_stats_process_rooms", "progress_json": "{}", - "depends_on": "populate_stats_createtables", + "depends_on": "populate_stats_prepare", }, ) ) @@ -64,18 +64,58 @@ class StatsRoomTests(unittest.HomeserverTestCase): self.store._simple_insert( "background_updates", { - "update_name": "populate_stats_cleanup", + "update_name": "populate_stats_process_users", "progress_json": "{}", "depends_on": "populate_stats_process_rooms", }, ) ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_stats_cleanup", + "progress_json": "{}", + "depends_on": "populate_stats_process_users", + }, + ) + ) + + def get_all_room_state(self): + return self.store._simple_select_list( + "room_stats_state", None, retcols=("name", "topic", "canonical_alias") + ) + + def _get_current_stats(self, stats_type, stat_id): + table, id_col = storage.stats.TYPE_TO_TABLE[stats_type] + + cols = list(storage.stats.ABSOLUTE_STATS_FIELDS[stats_type]) + list( + storage.stats.PER_SLICE_FIELDS[stats_type] + ) + + end_ts = self.store.quantise_stats_time(self.reactor.seconds() * 1000) + + return self.get_success( + self.store._simple_select_one( + table + "_historical", + {id_col: stat_id, end_ts: end_ts}, + cols, + allow_none=True, + ) + ) + + def _perform_background_initial_update(self): + # Do the initial population of the stats via the background update + self._add_background_updates() + + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) def test_initial_room(self): """ The background updates will build the table from scratch. """ - r = self.get_success(self.store.get_all_room_state()) + r = self.get_success(self.get_all_room_state()) self.assertEqual(len(r), 0) # Disable stats @@ -91,7 +131,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): ) # Stats disabled, shouldn't have done anything - r = self.get_success(self.store.get_all_room_state()) + r = self.get_success(self.get_all_room_state()) self.assertEqual(len(r), 0) # Enable stats @@ -104,7 +144,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): while not self.get_success(self.store.has_completed_background_updates()): self.get_success(self.store.do_next_background_update(100), by=0.1) - r = self.get_success(self.store.get_all_room_state()) + r = self.get_success(self.get_all_room_state()) self.assertEqual(len(r), 1) self.assertEqual(r[0]["topic"], "foo") @@ -114,6 +154,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): Ingestion via notify_new_event will ignore tokens that the background update have already processed. """ + self.reactor.advance(86401) self.hs.config.stats_enabled = False @@ -138,12 +179,18 @@ class StatsRoomTests(unittest.HomeserverTestCase): self.hs.config.stats_enabled = True self.handler.stats_enabled = True self.store._all_done = False - self.get_success(self.store.update_stats_stream_pos(None)) + self.get_success( + self.store._simple_update_one( + table="stats_incremental_position", + keyvalues={}, + updatevalues={"stream_id": 0}, + ) + ) self.get_success( self.store._simple_insert( "background_updates", - {"update_name": "populate_stats_createtables", "progress_json": "{}"}, + {"update_name": "populate_stats_prepare", "progress_json": "{}"}, ) ) @@ -154,6 +201,8 @@ class StatsRoomTests(unittest.HomeserverTestCase): self.helper.invite(room=room_1, src=u1, targ=u2, tok=u1_token) self.helper.join(room=room_1, user=u2, tok=u2_token) + # orig_delta_processor = self.store. + # Now do the initial ingestion. self.get_success( self.store._simple_insert( @@ -185,8 +234,15 @@ class StatsRoomTests(unittest.HomeserverTestCase): self.helper.invite(room=room_1, src=u1, targ=u3, tok=u1_token) self.helper.join(room=room_1, user=u3, tok=u3_token) - # Get the deltas! There should be two -- day 1, and day 2. - r = self.get_success(self.store.get_deltas_for_room(room_1, 0)) + # self.handler.notify_new_event() + + # We need to let the delta processor advance… + self.pump(10 * 60) + + # Get the slices! There should be two -- day 1, and day 2. + r = self.get_success(self.store.get_statistics_for_subject("room", room_1, 0)) + + self.assertEqual(len(r), 2) # The oldest has 2 joined members self.assertEqual(r[-1]["joined_members"], 2) @@ -194,111 +250,476 @@ class StatsRoomTests(unittest.HomeserverTestCase): # The newest has 3 self.assertEqual(r[0]["joined_members"], 3) - def test_incorrect_state_transition(self): - """ - If the state transition is not one of (JOIN, INVITE, LEAVE, BAN) to - (JOIN, INVITE, LEAVE, BAN), an error is raised. - """ - events = { - "a1": {"membership": Membership.LEAVE}, - "a2": {"membership": "not a real thing"}, - } - - def get_event(event_id, allow_none=True): - m = Mock() - m.content = events[event_id] - d = defer.Deferred() - self.reactor.callLater(0.0, d.callback, m) - return d - - def get_received_ts(event_id): - return defer.succeed(1) - - self.store.get_received_ts = get_received_ts - self.store.get_event = get_event - - deltas = [ - { - "type": EventTypes.Member, - "state_key": "some_user", - "room_id": "room", - "event_id": "a1", - "prev_event_id": "a2", - "stream_id": 60, - } - ] - - f = self.get_failure(self.handler._handle_deltas(deltas), ValueError) + def test_create_user(self): + """ + When we create a user, it should have statistics already ready. + """ + + u1 = self.register_user("u1", "pass") + + u1stats = self._get_current_stats("user", u1) + + self.assertIsNotNone(u1stats) + + # not in any rooms by default + self.assertEqual(u1stats["joined_rooms"], 0) + + def test_create_room(self): + """ + When we create a room, it should have statistics already ready. + """ + + self._perform_background_initial_update() + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + r1stats = self._get_current_stats("room", r1) + r2 = self.helper.create_room_as(u1, tok=u1token, is_public=False) + r2stats = self._get_current_stats("room", r2) + + self.assertIsNotNone(r1stats) + self.assertIsNotNone(r2stats) + + # contains the default things you'd expect in a fresh room self.assertEqual( - f.value.args[0], "'not a real thing' is not a valid prev_membership" - ) - - # And the other way... - deltas = [ - { - "type": EventTypes.Member, - "state_key": "some_user", - "room_id": "room", - "event_id": "a2", - "prev_event_id": "a1", - "stream_id": 100, - } - ] - - f = self.get_failure(self.handler._handle_deltas(deltas), ValueError) + r1stats["total_events"], + EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM, + "Wrong number of total_events in new room's stats!" + " You may need to update this if more state events are added to" + " the room creation process.", + ) self.assertEqual( - f.value.args[0], "'not a real thing' is not a valid membership" + r2stats["total_events"], + EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM, + "Wrong number of total_events in new room's stats!" + " You may need to update this if more state events are added to" + " the room creation process.", ) - def test_redacted_prev_event(self): + self.assertEqual( + r1stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM + ) + self.assertEqual( + r2stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM + ) + + self.assertEqual(r1stats["joined_members"], 1) + self.assertEqual(r1stats["invited_members"], 0) + self.assertEqual(r1stats["banned_members"], 0) + + self.assertEqual(r2stats["joined_members"], 1) + self.assertEqual(r2stats["invited_members"], 0) + self.assertEqual(r2stats["banned_members"], 0) + + def test_send_message_increments_total_events(self): """ - If the prev_event does not exist, then it is assumed to be a LEAVE. + When we send a message, it increments total_events. """ + + self._perform_background_initial_update() + u1 = self.register_user("u1", "pass") - u1_token = self.login("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + r1stats_ante = self._get_current_stats("room", r1) - room_1 = self.helper.create_room_as(u1, tok=u1_token) + self.helper.send(r1, "hiss", tok=u1token) - # Do the initial population of the user directory via the background update - self._add_background_updates() + r1stats_post = self._get_current_stats("room", r1) + + self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1) + + def test_send_state_event_nonoverwriting(self): + """ + When we send a non-overwriting state event, it increments total_events AND current_state_events + """ + + self._perform_background_initial_update() + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + + self.helper.send_state( + r1, "cat.hissing", {"value": True}, tok=u1token, state_key="tabby" + ) + + r1stats_ante = self._get_current_stats("room", r1) + + self.helper.send_state( + r1, "cat.hissing", {"value": False}, tok=u1token, state_key="moggy" + ) + + r1stats_post = self._get_current_stats("room", r1) + + self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1) + self.assertEqual( + r1stats_post["current_state_events"] - r1stats_ante["current_state_events"], + 1, + ) + + def test_send_state_event_overwriting(self): + """ + When we send an overwriting state event, it increments total_events ONLY + """ + + self._perform_background_initial_update() + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + + self.helper.send_state( + r1, "cat.hissing", {"value": True}, tok=u1token, state_key="tabby" + ) + + r1stats_ante = self._get_current_stats("room", r1) + + self.helper.send_state( + r1, "cat.hissing", {"value": False}, tok=u1token, state_key="tabby" + ) + + r1stats_post = self._get_current_stats("room", r1) + + self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1) + self.assertEqual( + r1stats_post["current_state_events"] - r1stats_ante["current_state_events"], + 0, + ) + + def test_join_first_time(self): + """ + When a user joins a room for the first time, total_events, current_state_events and + joined_members should increase by exactly 1. + """ + + self._perform_background_initial_update() + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + + u2 = self.register_user("u2", "pass") + u2token = self.login("u2", "pass") + + r1stats_ante = self._get_current_stats("room", r1) + + self.helper.join(r1, u2, tok=u2token) + + r1stats_post = self._get_current_stats("room", r1) + + self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1) + self.assertEqual( + r1stats_post["current_state_events"] - r1stats_ante["current_state_events"], + 1, + ) + self.assertEqual( + r1stats_post["joined_members"] - r1stats_ante["joined_members"], 1 + ) + + def test_join_after_leave(self): + """ + When a user joins a room after being previously left, total_events and + joined_members should increase by exactly 1. + current_state_events should not increase. + left_members should decrease by exactly 1. + """ + + self._perform_background_initial_update() + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + + u2 = self.register_user("u2", "pass") + u2token = self.login("u2", "pass") + + self.helper.join(r1, u2, tok=u2token) + self.helper.leave(r1, u2, tok=u2token) + + r1stats_ante = self._get_current_stats("room", r1) + + self.helper.join(r1, u2, tok=u2token) + + r1stats_post = self._get_current_stats("room", r1) + + self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1) + self.assertEqual( + r1stats_post["current_state_events"] - r1stats_ante["current_state_events"], + 0, + ) + self.assertEqual( + r1stats_post["joined_members"] - r1stats_ante["joined_members"], +1 + ) + self.assertEqual( + r1stats_post["left_members"] - r1stats_ante["left_members"], -1 + ) + + def test_invited(self): + """ + When a user invites another user, current_state_events, total_events and + invited_members should increase by exactly 1. + """ + + self._perform_background_initial_update() + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + + u2 = self.register_user("u2", "pass") + + r1stats_ante = self._get_current_stats("room", r1) + + self.helper.invite(r1, u1, u2, tok=u1token) + + r1stats_post = self._get_current_stats("room", r1) + + self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1) + self.assertEqual( + r1stats_post["current_state_events"] - r1stats_ante["current_state_events"], + 1, + ) + self.assertEqual( + r1stats_post["invited_members"] - r1stats_ante["invited_members"], +1 + ) + + def test_join_after_invite(self): + """ + When a user joins a room after being invited, total_events and + joined_members should increase by exactly 1. + current_state_events should not increase. + invited_members should decrease by exactly 1. + """ + + self._perform_background_initial_update() + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + + u2 = self.register_user("u2", "pass") + u2token = self.login("u2", "pass") + + self.helper.invite(r1, u1, u2, tok=u1token) + + r1stats_ante = self._get_current_stats("room", r1) + + self.helper.join(r1, u2, tok=u2token) + + r1stats_post = self._get_current_stats("room", r1) + + self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1) + self.assertEqual( + r1stats_post["current_state_events"] - r1stats_ante["current_state_events"], + 0, + ) + self.assertEqual( + r1stats_post["joined_members"] - r1stats_ante["joined_members"], +1 + ) + self.assertEqual( + r1stats_post["invited_members"] - r1stats_ante["invited_members"], -1 + ) + + def test_left(self): + """ + When a user leaves a room after joining, total_events and + left_members should increase by exactly 1. + current_state_events should not increase. + joined_members should decrease by exactly 1. + """ + + self._perform_background_initial_update() + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + + u2 = self.register_user("u2", "pass") + u2token = self.login("u2", "pass") + + self.helper.join(r1, u2, tok=u2token) + + r1stats_ante = self._get_current_stats("room", r1) + + self.helper.leave(r1, u2, tok=u2token) + + r1stats_post = self._get_current_stats("room", r1) + + self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1) + self.assertEqual( + r1stats_post["current_state_events"] - r1stats_ante["current_state_events"], + 0, + ) + self.assertEqual( + r1stats_post["left_members"] - r1stats_ante["left_members"], +1 + ) + self.assertEqual( + r1stats_post["joined_members"] - r1stats_ante["joined_members"], -1 + ) + + def test_banned(self): + """ + When a user is banned from a room after joining, total_events and + left_members should increase by exactly 1. + current_state_events should not increase. + banned_members should decrease by exactly 1. + """ + + self._perform_background_initial_update() + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + + u2 = self.register_user("u2", "pass") + u2token = self.login("u2", "pass") + + self.helper.join(r1, u2, tok=u2token) + + r1stats_ante = self._get_current_stats("room", r1) + + self.helper.change_membership(r1, u1, u2, "ban", tok=u1token) + + r1stats_post = self._get_current_stats("room", r1) + + self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1) + self.assertEqual( + r1stats_post["current_state_events"] - r1stats_ante["current_state_events"], + 0, + ) + self.assertEqual( + r1stats_post["banned_members"] - r1stats_ante["banned_members"], +1 + ) + self.assertEqual( + r1stats_post["joined_members"] - r1stats_ante["joined_members"], -1 + ) + + def test_initial_background_update(self): + """ + Test that statistics can be generated by the initial background update + handler. + + This test also tests that stats rows are not created for new subjects + when stats are disabled. However, it may be desirable to change this + behaviour eventually to still keep current rows. + """ + + self.hs.config.stats_enabled = False + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token) + + # test that these subjects, which were created during a time of disabled + # stats, do not have stats. + self.assertIsNone(self._get_current_stats("room", r1)) + self.assertIsNone(self._get_current_stats("user", u1)) + + self.hs.config.stats_enabled = True + + self._perform_background_initial_update() + + r1stats = self._get_current_stats("room", r1) + u1stats = self._get_current_stats("user", u1) + + self.assertEqual(r1stats["joined_members"], 1) + self.assertEqual( + r1stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM + ) + + self.assertEqual(u1stats["joined_rooms"], 1) + + def test_incomplete_stats(self): + """ + This tests that we track incomplete statistics. + + We first test that incomplete stats are incrementally generated, + following the preparation of a background regen. + + We then test that these incomplete rows are completed by the background + regen. + """ + + u1 = self.register_user("u1", "pass") + u1token = self.login("u1", "pass") + u2 = self.register_user("u2", "pass") + u2token = self.login("u2", "pass") + u3 = self.register_user("u3", "pass") + r1 = self.helper.create_room_as(u1, tok=u1token, is_public=False) + + # preparation stage of the initial background update + # Ugh, have to reset this flag + self.store._all_done = False + + self.get_success( + self.store._simple_delete( + "room_stats_current", {"1": 1}, "test_delete_stats" + ) + ) + self.get_success( + self.store._simple_delete( + "user_stats_current", {"1": 1}, "test_delete_stats" + ) + ) + + self.helper.invite(r1, u1, u2, tok=u1token) + self.helper.join(r1, u2, tok=u2token) + self.helper.invite(r1, u1, u3, tok=u1token) + self.helper.send(r1, "thou shalt yield", tok=u1token) + + # now do the background updates + + self.store._all_done = False + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_stats_process_rooms", + "progress_json": "{}", + "depends_on": "populate_stats_prepare", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_stats_process_users", + "progress_json": "{}", + "depends_on": "populate_stats_process_rooms", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_stats_cleanup", + "progress_json": "{}", + "depends_on": "populate_stats_process_users", + }, + ) + ) while not self.get_success(self.store.has_completed_background_updates()): self.get_success(self.store.do_next_background_update(100), by=0.1) - events = {"a1": None, "a2": {"membership": Membership.JOIN}} - - def get_event(event_id, allow_none=True): - if events.get(event_id): - m = Mock() - m.content = events[event_id] - else: - m = None - d = defer.Deferred() - self.reactor.callLater(0.0, d.callback, m) - return d - - def get_received_ts(event_id): - return defer.succeed(1) - - self.store.get_received_ts = get_received_ts - self.store.get_event = get_event - - deltas = [ - { - "type": EventTypes.Member, - "state_key": "some_user:test", - "room_id": room_1, - "event_id": "a2", - "prev_event_id": "a1", - "stream_id": 100, - } - ] - - # Handle our fake deltas, which has a user going from LEAVE -> JOIN. - self.get_success(self.handler._handle_deltas(deltas)) - - # One delta, with two joined members -- the room creator, and our fake - # user. - r = self.get_success(self.store.get_deltas_for_room(room_1, 0)) - self.assertEqual(len(r), 1) - self.assertEqual(r[0]["joined_members"], 2) + r1stats_complete = self._get_current_stats("room", r1) + u1stats_complete = self._get_current_stats("user", u1) + u2stats_complete = self._get_current_stats("user", u2) + + # now we make our assertions + + # check that _complete rows are complete and correct + self.assertEqual(r1stats_complete["joined_members"], 2) + self.assertEqual(r1stats_complete["invited_members"], 1) + + self.assertEqual( + r1stats_complete["current_state_events"], + 2 + EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM, + ) + + self.assertEqual(u1stats_complete["joined_rooms"], 1) + self.assertEqual(u2stats_complete["joined_rooms"], 1) diff --git a/tests/rest/client/v1/utils.py b/tests/rest/client/v1/utils.py index 9915367144..cdded88b7f 100644 --- a/tests/rest/client/v1/utils.py +++ b/tests/rest/client/v1/utils.py @@ -128,8 +128,12 @@ class RestHelper(object): return channel.json_body - def send_state(self, room_id, event_type, body, tok, expect_code=200): - path = "/_matrix/client/r0/rooms/%s/state/%s" % (room_id, event_type) + def send_state(self, room_id, event_type, body, tok, expect_code=200, state_key=""): + path = "/_matrix/client/r0/rooms/%s/state/%s/%s" % ( + room_id, + event_type, + state_key, + ) if tok: path = path + "?access_token=%s" % tok -- cgit 1.4.1 From b09d443632d699954677b4b56d4249826a3f6524 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 4 Sep 2019 16:16:56 +0100 Subject: Cleanup event auth type initialisation (#5975) Very small code cleanup. --- changelog.d/5975.misc | 1 + synapse/event_auth.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 changelog.d/5975.misc diff --git a/changelog.d/5975.misc b/changelog.d/5975.misc new file mode 100644 index 0000000000..5fcd229b89 --- /dev/null +++ b/changelog.d/5975.misc @@ -0,0 +1 @@ +Cleanup event auth type initialisation. \ No newline at end of file diff --git a/synapse/event_auth.py b/synapse/event_auth.py index cd52e3f867..4e91df60e6 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -637,11 +637,11 @@ def auth_types_for_event(event): if event.type == EventTypes.Create: return [] - auth_types = [] - - auth_types.append((EventTypes.PowerLevels, "")) - auth_types.append((EventTypes.Member, event.sender)) - auth_types.append((EventTypes.Create, "")) + auth_types = [ + (EventTypes.PowerLevels, ""), + (EventTypes.Member, event.sender), + (EventTypes.Create, ""), + ] if event.type == EventTypes.Member: membership = event.content["membership"] -- cgit 1.4.1 From b736c6cd3a67901d8b094acb26b3649b46e51931 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 4 Sep 2019 18:24:23 +0100 Subject: Remove bind_email and bind_msisdn (#5964) Removes the `bind_email` and `bind_msisdn` parameters from the `/register` C/S API endpoint as per [MSC2140: Terms of Service for ISes and IMs](https://github.com/matrix-org/matrix-doc/pull/2140/files#diff-c03a26de5ac40fb532de19cb7fc2aaf7R107). --- changelog.d/5964.feature | 1 + synapse/handlers/register.py | 50 ++++---------------------------- synapse/replication/http/register.py | 21 ++------------ synapse/rest/client/v2_alpha/register.py | 2 -- 4 files changed, 10 insertions(+), 64 deletions(-) create mode 100644 changelog.d/5964.feature diff --git a/changelog.d/5964.feature b/changelog.d/5964.feature new file mode 100644 index 0000000000..273c9df026 --- /dev/null +++ b/changelog.d/5964.feature @@ -0,0 +1 @@ +Remove `bind_email` and `bind_msisdn` parameters from /register ala MSC2140. \ No newline at end of file diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index e59b2a3684..975da57ffd 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -543,9 +543,7 @@ class RegistrationHandler(BaseHandler): return (device_id, access_token) @defer.inlineCallbacks - def post_registration_actions( - self, user_id, auth_result, access_token, bind_email, bind_msisdn - ): + def post_registration_actions(self, user_id, auth_result, access_token): """A user has completed registration Args: @@ -554,18 +552,10 @@ class RegistrationHandler(BaseHandler): registered user. access_token (str|None): The access token of the newly logged in device, or None if `inhibit_login` enabled. - bind_email (bool): Whether to bind the email with the identity - server. - bind_msisdn (bool): Whether to bind the msisdn with the identity - server. """ if self.hs.config.worker_app: yield self._post_registration_client( - user_id=user_id, - auth_result=auth_result, - access_token=access_token, - bind_email=bind_email, - bind_msisdn=bind_msisdn, + user_id=user_id, auth_result=auth_result, access_token=access_token ) return @@ -578,13 +568,11 @@ class RegistrationHandler(BaseHandler): ): yield self.store.upsert_monthly_active_user(user_id) - yield self._register_email_threepid( - user_id, threepid, access_token, bind_email - ) + yield self._register_email_threepid(user_id, threepid, access_token) if auth_result and LoginType.MSISDN in auth_result: threepid = auth_result[LoginType.MSISDN] - yield self._register_msisdn_threepid(user_id, threepid, bind_msisdn) + yield self._register_msisdn_threepid(user_id, threepid) if auth_result and LoginType.TERMS in auth_result: yield self._on_user_consented(user_id, self.hs.config.user_consent_version) @@ -603,14 +591,12 @@ class RegistrationHandler(BaseHandler): yield self.post_consent_actions(user_id) @defer.inlineCallbacks - def _register_email_threepid(self, user_id, threepid, token, bind_email): + def _register_email_threepid(self, user_id, threepid, token): """Add an email address as a 3pid identifier Also adds an email pusher for the email address, if configured in the HS config - Also optionally binds emails to the given user_id on the identity server - Must be called on master. Args: @@ -618,8 +604,6 @@ class RegistrationHandler(BaseHandler): threepid (object): m.login.email.identity auth response token (str|None): access_token for the user, or None if not logged in. - bind_email (bool): true if the client requested the email to be - bound at the identity server Returns: defer.Deferred: """ @@ -661,28 +645,15 @@ class RegistrationHandler(BaseHandler): data={}, ) - if bind_email: - logger.info("bind_email specified: binding") - logger.debug("Binding emails %s to %s" % (threepid, user_id)) - yield self.identity_handler.bind_threepid( - threepid["threepid_creds"], user_id - ) - else: - logger.info("bind_email not specified: not binding email") - @defer.inlineCallbacks - def _register_msisdn_threepid(self, user_id, threepid, bind_msisdn): + def _register_msisdn_threepid(self, user_id, threepid): """Add a phone number as a 3pid identifier - Also optionally binds msisdn to the given user_id on the identity server - Must be called on master. Args: user_id (str): id of user threepid (object): m.login.msisdn auth response - bind_msisdn (bool): true if the client requested the msisdn to be - bound at the identity server Returns: defer.Deferred: """ @@ -698,12 +669,3 @@ class RegistrationHandler(BaseHandler): yield self._auth_handler.add_threepid( user_id, threepid["medium"], threepid["address"], threepid["validated_at"] ) - - if bind_msisdn: - logger.info("bind_msisdn specified: binding") - logger.debug("Binding msisdn %s to %s", threepid, user_id) - yield self.identity_handler.bind_threepid( - threepid["threepid_creds"], user_id - ) - else: - logger.info("bind_msisdn not specified: not binding msisdn") diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 87fe2dd9b0..38260256cf 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -106,7 +106,7 @@ class ReplicationPostRegisterActionsServlet(ReplicationEndpoint): self.registration_handler = hs.get_registration_handler() @staticmethod - def _serialize_payload(user_id, auth_result, access_token, bind_email, bind_msisdn): + def _serialize_payload(user_id, auth_result, access_token): """ Args: user_id (str): The user ID that consented @@ -114,17 +114,8 @@ class ReplicationPostRegisterActionsServlet(ReplicationEndpoint): registered user. access_token (str|None): The access token of the newly logged in device, or None if `inhibit_login` enabled. - bind_email (bool): Whether to bind the email with the identity - server - bind_msisdn (bool): Whether to bind the msisdn with the identity - server """ - return { - "auth_result": auth_result, - "access_token": access_token, - "bind_email": bind_email, - "bind_msisdn": bind_msisdn, - } + return {"auth_result": auth_result, "access_token": access_token} @defer.inlineCallbacks def _handle_request(self, request, user_id): @@ -132,15 +123,9 @@ class ReplicationPostRegisterActionsServlet(ReplicationEndpoint): auth_result = content["auth_result"] access_token = content["access_token"] - bind_email = content["bind_email"] - bind_msisdn = content["bind_msisdn"] yield self.registration_handler.post_registration_actions( - user_id=user_id, - auth_result=auth_result, - access_token=access_token, - bind_email=bind_email, - bind_msisdn=bind_msisdn, + user_id=user_id, auth_result=auth_result, access_token=access_token ) return 200, {} diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 107854c669..1ccd2bed2f 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -481,8 +481,6 @@ class RegisterRestServlet(RestServlet): user_id=registered_user_id, auth_result=auth_result, access_token=return_dict.get("access_token"), - bind_email=params.get("bind_email"), - bind_msisdn=params.get("bind_msisdn"), ) return 200, return_dict -- cgit 1.4.1 From a22d58c96c714e5f97b3e68f3ec7f2aeee854a81 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 4 Sep 2019 19:32:35 -0400 Subject: add user signature stream change cache to slaved device store --- synapse/replication/slave/storage/devices.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index d9300fce33..f045e1b937 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -33,6 +33,9 @@ class SlavedDeviceStore(EndToEndKeyWorkerStore, DeviceWorkerStore, BaseSlavedSto self._device_list_stream_cache = StreamChangeCache( "DeviceListStreamChangeCache", device_list_max ) + self._user_signature_stream_cache = StreamChangeCache( + "UserSignatureStreamChangeCache", device_list_max + ) self._device_list_federation_stream_cache = StreamChangeCache( "DeviceListFederationStreamChangeCache", device_list_max ) -- cgit 1.4.1 From 4bb454478470c6b707d33292113ac3a23010db8b Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 22 May 2019 16:41:24 -0400 Subject: implement device signature uploading/fetching --- synapse/handlers/e2e_keys.py | 250 +++++++++++++++++++++++++++++++++++ synapse/rest/client/v2_alpha/keys.py | 50 +++++++ synapse/storage/end_to_end_keys.py | 38 ++++++ 3 files changed, 338 insertions(+) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 997ad66f8f..9747b517ff 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -608,6 +608,194 @@ class E2eKeysHandler(object): return {} + @defer.inlineCallbacks + def upload_signatures_for_device_keys(self, user_id, signatures): + """Upload device signatures for cross-signing + + Args: + user_id (string): the user uploading the signatures + signatures (dict[string, dict[string, dict]]): map of users to + devices to signed keys + """ + failures = {} + + signature_list = [] # signatures to be stored + self_device_ids = [] # what devices have been updated, for notifying + + # split between checking signatures for own user and signatures for + # other users, since we verify them with different keys + if user_id in signatures: + self_signatures = signatures[user_id] + del signatures[user_id] + self_device_ids = list(self_signatures.keys()) + try: + # get our self-signing key to verify the signatures + self_signing_key = yield self.store.get_e2e_cross_signing_key( + user_id, "self_signing" + ) + if self_signing_key is None: + raise SynapseError( + 404, + "No self-signing key found", + Codes.NOT_FOUND + ) + + self_signing_key_id, self_signing_verify_key \ + = get_verify_key_from_cross_signing_key(self_signing_key) + + # fetch our stored devices so that we can compare with what was sent + user_devices = [] + for device in self_signatures.keys(): + user_devices.append((user_id, device)) + devices = yield self.store.get_e2e_device_keys(user_devices) + + if user_id not in devices: + raise SynapseError( + 404, + "No device key found", + Codes.NOT_FOUND + ) + + devices = devices[user_id] + for device_id, device in self_signatures.items(): + try: + if ("signatures" not in device or + user_id not in device["signatures"] or + self_signing_key_id not in device["signatures"][user_id]): + # no signature was sent + raise SynapseError( + 400, + "Invalid signature", + Codes.INVALID_SIGNATURE + ) + + stored_device = devices[device_id]["keys"] + if self_signing_key_id in stored_device.get("signatures", {}) \ + .get(user_id, {}): + # we already have a signature on this device, so we + # can skip it, since it should be exactly the same + continue + + _check_device_signature( + user_id, self_signing_verify_key, device, stored_device + ) + + signature = device["signatures"][user_id][self_signing_key_id] + signature_list.append( + (self_signing_key_id, user_id, device_id, signature) + ) + except SynapseError as e: + failures.setdefault(user_id, {})[device_id] \ + = _exception_to_failure(e) + except SynapseError as e: + failures[user_id] = { + device: _exception_to_failure(e) + for device in self_signatures.keys() + } + + signed_users = [] # what user have been signed, for notifying + if len(signatures): + # if signatures isn't empty, then we have signatures for other + # users. These signatures will be signed by the user signing key + + # get our user-signing key to verify the signatures + user_signing_key = yield self.store.get_e2e_cross_signing_key( + user_id, "user_signing" + ) + if user_signing_key is None: + for user, devicemap in signatures.items(): + failures[user] = { + device: _exception_to_failure(SynapseError( + 404, + "No user-signing key found", + Codes.NOT_FOUND + )) + for device in devicemap.keys() + } + else: + user_signing_key_id, user_signing_verify_key \ + = get_verify_key_from_cross_signing_key(user_signing_key) + + for user, devicemap in signatures.items(): + device_id = None + try: + # get the user's master key, to make sure it matches + # what was sent + stored_key = yield self.store.get_e2e_cross_signing_key( + user, "master", user_id + ) + if stored_key is None: + logger.error( + "upload signature: no user key found for %s", user + ) + raise SynapseError( + 404, + "User's master key not found", + Codes.NOT_FOUND + ) + + # make sure that the user's master key is the one that + # was signed (and no others) + device_id = get_verify_key_from_cross_signing_key(stored_key)[0] \ + .split(":", 1)[1] + if device_id not in devicemap: + logger.error( + "upload signature: wrong device: %s vs %s", + device, devicemap + ) + raise SynapseError( + 404, + "Unknown device", + Codes.NOT_FOUND + ) + if len(devicemap) > 1: + logger.error("upload signature: too many devices specified") + failures[user] = { + device: _exception_to_failure(SynapseError( + 404, + "Unknown device", + Codes.NOT_FOUND + )) + for device in devicemap.keys() + } + + key = devicemap[device_id] + + if user_signing_key_id in stored_key.get("signatures", {}) \ + .get(user_id, {}): + # we already have the signature, so we can skip it + continue + + _check_device_signature( + user_id, user_signing_verify_key, key, stored_key + ) + + signature = key["signatures"][user_id][user_signing_key_id] + + signed_users.append(user) + signature_list.append( + (user_signing_key_id, user, device_id, signature) + ) + except SynapseError as e: + if device_id is None: + failures[user] = { + device_id: _exception_to_failure(e) + for device_id in devicemap.keys() + } + else: + failures.setdefault(user, {})[device_id] \ + = _exception_to_failure(e) + + # store the signature, and send the appropriate notifications for sync + logger.debug("upload signature failures: %r", failures) + yield self.store.store_e2e_device_signatures(user_id, signature_list) + + if len(self_device_ids): + yield self.device_handler.notify_device_update(user_id, self_device_ids) + if len(signed_users): + yield self.device_handler.notify_user_signature_update(user_id, signed_users) + + defer.returnValue({"failures": failures}) def _check_cross_signing_key(key, user_id, key_type, signing_key=None): """Check a cross-signing key uploaded by a user. Performs some basic sanity @@ -636,6 +824,68 @@ def _check_cross_signing_key(key, user_id, key_type, signing_key=None): ) +def _check_device_signature(user_id, verify_key, signed_device, stored_device): + """Check that a device signature is correct and matches the copy of the device + that we have. Throws an exception if an error is detected. + + Args: + user_id (str): the user ID whose signature is being checked + verify_key (VerifyKey): the key to verify the device with + signed_device (dict): the signed device data + stored_device (dict): our previous copy of the device + """ + + key_id = "%s:%s" % (verify_key.alg, verify_key.version) + + # make sure the device is signed + if ("signatures" not in signed_device or user_id not in signed_device["signatures"] + or key_id not in signed_device["signatures"][user_id]): + logger.error("upload signature: user not found in signatures") + raise SynapseError( + 400, + "Invalid signature", + Codes.INVALID_SIGNATURE + ) + + signature = signed_device["signatures"][user_id][key_id] + + # make sure that the device submitted matches what we have stored + del signed_device["signatures"] + if "unsigned" in signed_device: + del signed_device["unsigned"] + if "signatures" in stored_device: + del stored_device["signatures"] + if "unsigned" in stored_device: + del stored_device["unsigned"] + if signed_device != stored_device: + logger.error( + "upload signatures: key does not match %s vs %s", + signed_device, stored_device + ) + raise SynapseError( + 400, + "Key does not match", + "M_MISMATCHED_KEY" + ) + + # check the signature + signed_device["signatures"] = { + user_id: { + key_id: signature + } + } + + try: + verify_signed_json(signed_device, user_id, verify_key) + except SignatureVerifyException: + logger.error("invalid signature on key") + raise SynapseError( + 400, + "Invalid signature", + Codes.INVALID_SIGNATURE + ) + + def _exception_to_failure(e): if isinstance(e, CodeMessageException): return {"status": e.code, "message": str(e)} diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 151a70d449..5c288d48b7 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -277,9 +277,59 @@ class SigningKeyUploadServlet(RestServlet): return (200, result) +class SignaturesUploadServlet(RestServlet): + """ + POST /keys/signatures/upload HTTP/1.1 + Content-Type: application/json + + { + "@alice:example.com": { + "": { + "user_id": "", + "device_id": "", + "algorithms": [ + "m.olm.curve25519-aes-sha256", + "m.megolm.v1.aes-sha" + ], + "keys": { + ":": "", + }, + "signatures": { + "": { + ":": ">" + } + } + } + } + } + """ + PATTERNS = client_v2_patterns("/keys/signatures/upload$") + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(SignaturesUploadServlet, self).__init__() + self.auth = hs.get_auth() + self.e2e_keys_handler = hs.get_e2e_keys_handler() + + @defer.inlineCallbacks + def on_POST(self, request): + requester = yield self.auth.get_user_by_req(request, allow_guest=True) + user_id = requester.user.to_string() + body = parse_json_object_from_request(request) + + result = yield self.e2e_keys_handler.upload_signatures_for_device_keys( + user_id, body + ) + defer.returnValue((200, result)) + + def register_servlets(hs, http_server): KeyUploadServlet(hs).register(http_server) KeyQueryServlet(hs).register(http_server) KeyChangesServlet(hs).register(http_server) OneTimeKeyServlet(hs).register(http_server) SigningKeyUploadServlet(hs).register(http_server) + SignaturesUploadServlet(hs).register(http_server) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 8ce5dd8bf9..fe786f3093 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -59,6 +59,12 @@ class EndToEndKeyWorkerStore(SQLBaseStore): for user_id, device_keys in iteritems(results): for device_id, device_info in iteritems(device_keys): device_info["keys"] = db_to_json(device_info.pop("key_json")) + # add cross-signing signatures to the keys + if "signatures" in device_info: + for sig_user_id, sigs in device_info["signatures"].items(): + device_info["keys"].setdefault("signatures", {}) \ + .setdefault(sig_user_id, {}) \ + .update(sigs) return results @@ -71,6 +77,8 @@ class EndToEndKeyWorkerStore(SQLBaseStore): query_clauses = [] query_params = [] + signature_query_clauses = [] + signature_query_params = [] if include_all_devices is False: include_deleted_devices = False @@ -81,12 +89,20 @@ class EndToEndKeyWorkerStore(SQLBaseStore): for (user_id, device_id) in query_list: query_clause = "user_id = ?" query_params.append(user_id) + signature_query_clause = "target_user_id = ?" + signature_query_params.append(user_id) if device_id is not None: query_clause += " AND device_id = ?" query_params.append(device_id) + signature_query_clause += " AND target_device_id = ?" + signature_query_params.append(device_id) + + signature_query_clause += " AND user_id = ?" + signature_query_params.append(user_id) query_clauses.append(query_clause) + signature_query_clauses.append(signature_query_clause) sql = ( "SELECT user_id, device_id, " @@ -113,6 +129,28 @@ class EndToEndKeyWorkerStore(SQLBaseStore): for user_id, device_id in deleted_devices: result.setdefault(user_id, {})[device_id] = None + # get signatures on the device + signature_sql = ( + "SELECT * " + " FROM e2e_device_signatures " + " WHERE %s" + ) % ( + " OR ".join("(" + q + ")" for q in signature_query_clauses) + ) + + txn.execute(signature_sql, signature_query_params) + rows = self.cursor_to_dict(txn) + + for row in rows: + target_user_id = row["target_user_id"] + target_device_id = row["target_device_id"] + if target_user_id in result \ + and target_device_id in result[target_user_id]: + result[target_user_id][target_device_id] \ + .setdefault("signatures", {}) \ + .setdefault(row["user_id"], {})[row["key_id"]] \ + = row["signature"] + log_kv(result) return result -- cgit 1.4.1 From ac4746ac4bb4d9371c5a25e94ecccd83effb8b9a Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 17 Jul 2019 22:11:31 -0400 Subject: allow uploading signatures of master key signed by devices --- synapse/handlers/e2e_keys.py | 232 ++++++++++++++++++++++------------- synapse/rest/client/v2_alpha/keys.py | 2 +- synapse/storage/end_to_end_keys.py | 2 +- tests/handlers/test_e2e_keys.py | 227 +++++++++++++++++++++++++++++++++- 4 files changed, 378 insertions(+), 85 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 9747b517ff..1148803c1e 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -20,7 +20,9 @@ import logging from six import iteritems from canonicaljson import encode_canonical_json, json +from signedjson.key import decode_verify_key_bytes from signedjson.sign import SignatureVerifyException, verify_signed_json +from unpaddedbase64 import decode_base64 from twisted.internet import defer @@ -619,8 +621,11 @@ class E2eKeysHandler(object): """ failures = {} - signature_list = [] # signatures to be stored - self_device_ids = [] # what devices have been updated, for notifying + # signatures to be stored. Each item will be a tuple of + # (signing_key_id, target_user_id, target_device_id, signature) + signature_list = [] + # what devices have been updated, for notifying + self_device_ids = [] # split between checking signatures for own user and signatures for # other users, since we verify them with different keys @@ -630,46 +635,107 @@ class E2eKeysHandler(object): self_device_ids = list(self_signatures.keys()) try: # get our self-signing key to verify the signatures - self_signing_key = yield self.store.get_e2e_cross_signing_key( - user_id, "self_signing" - ) - if self_signing_key is None: - raise SynapseError( - 404, - "No self-signing key found", - Codes.NOT_FOUND + self_signing_key, self_signing_key_id, self_signing_verify_key \ + = yield self._get_e2e_cross_signing_verify_key( + user_id, "self_signing" ) - self_signing_key_id, self_signing_verify_key \ - = get_verify_key_from_cross_signing_key(self_signing_key) + # get our master key, since it may be signed + master_key, master_key_id, master_verify_key \ + = yield self._get_e2e_cross_signing_verify_key( + user_id, "master" + ) - # fetch our stored devices so that we can compare with what was sent - user_devices = [] - for device in self_signatures.keys(): - user_devices.append((user_id, device)) - devices = yield self.store.get_e2e_device_keys(user_devices) + # fetch our stored devices. This is used to 1. verify + # signatures on the master key, and 2. to can compare with what + # was sent if the device was signed + devices = yield self.store.get_e2e_device_keys([(user_id, None)]) if user_id not in devices: raise SynapseError( - 404, - "No device key found", - Codes.NOT_FOUND + 404, "No device keys found", Codes.NOT_FOUND ) devices = devices[user_id] for device_id, device in self_signatures.items(): try: if ("signatures" not in device or - user_id not in device["signatures"] or - self_signing_key_id not in device["signatures"][user_id]): + user_id not in device["signatures"]): + # no signature was sent + raise SynapseError( + 400, "Invalid signature", Codes.INVALID_SIGNATURE + ) + + if device_id == master_verify_key.version: + # we have master key signed by devices: for each + # device that signed, check the signature. Since + # the "failures" property in the response only has + # granularity up to the signed device, either all + # of the signatures on the master key succeed, or + # all fail. So loop over the signatures and add + # them to a separate signature list. If everything + # works out, then add them all to the main + # signature list. (In practice, we're likely to + # only have only one signature anyways.) + master_key_signature_list = [] + for signing_key_id, signature in device["signatures"][user_id].items(): + alg, signing_device_id = signing_key_id.split(":", 1) + if (signing_device_id not in devices or + signing_key_id not in + devices[signing_device_id]["keys"]["keys"]): + # signed by an unknown device, or the + # device does not have the key + raise SynapseError( + 400, "Invalid signature", Codes.INVALID_SIGNATURE + ) + + sigs = device["signatures"] + del device["signatures"] + # use pop to avoid exception if key doesn't exist + device.pop("unsigned", None) + master_key.pop("signature", None) + master_key.pop("unsigned", None) + + if master_key != device: + raise SynapseError( + 400, "Key does not match" + ) + + # get the key and check the signature + pubkey = devices[signing_device_id]["keys"]["keys"][signing_key_id] + verify_key = decode_verify_key_bytes( + signing_key_id, decode_base64(pubkey) + ) + device["signatures"] = sigs + try: + verify_signed_json(device, user_id, verify_key) + except SignatureVerifyException: + raise SynapseError( + 400, "Invalid signature", Codes.INVALID_SIGNATURE + ) + + master_key_signature_list.append( + (signing_key_id, user_id, device_id, signature) + ) + + signature_list.extend(master_key_signature_list) + continue + + # at this point, we have a device that should be signed + # by the self-signing key + if self_signing_key_id not in device["signatures"][user_id]: # no signature was sent raise SynapseError( - 400, - "Invalid signature", - Codes.INVALID_SIGNATURE + 400, "Invalid signature", Codes.INVALID_SIGNATURE ) - stored_device = devices[device_id]["keys"] + stored_device = None + try: + stored_device = devices[device_id]["keys"] + except KeyError: + raise SynapseError( + 404, "Unknown device", Codes.NOT_FOUND + ) if self_signing_key_id in stored_device.get("signatures", {}) \ .get(user_id, {}): # we already have a signature on this device, so we @@ -698,69 +764,50 @@ class E2eKeysHandler(object): # if signatures isn't empty, then we have signatures for other # users. These signatures will be signed by the user signing key - # get our user-signing key to verify the signatures - user_signing_key = yield self.store.get_e2e_cross_signing_key( - user_id, "user_signing" - ) - if user_signing_key is None: - for user, devicemap in signatures.items(): - failures[user] = { - device: _exception_to_failure(SynapseError( - 404, - "No user-signing key found", - Codes.NOT_FOUND - )) - for device in devicemap.keys() - } - else: - user_signing_key_id, user_signing_verify_key \ - = get_verify_key_from_cross_signing_key(user_signing_key) + try: + # get our user-signing key to verify the signatures + user_signing_key, user_signing_key_id, user_signing_verify_key \ + = yield self._get_e2e_cross_signing_verify_key( + user_id, "user_signing" + ) for user, devicemap in signatures.items(): device_id = None try: # get the user's master key, to make sure it matches # what was sent - stored_key = yield self.store.get_e2e_cross_signing_key( - user, "master", user_id - ) - if stored_key is None: - logger.error( - "upload signature: no user key found for %s", user - ) - raise SynapseError( - 404, - "User's master key not found", - Codes.NOT_FOUND + stored_key, stored_key_id, _ \ + = yield self._get_e2e_cross_signing_verify_key( + user, "master", user_id ) # make sure that the user's master key is the one that # was signed (and no others) - device_id = get_verify_key_from_cross_signing_key(stored_key)[0] \ - .split(":", 1)[1] + device_id = stored_key_id.split(":", 1)[1] if device_id not in devicemap: + # set device to None so that the failure gets + # marked on all the signatures + device_id = None logger.error( "upload signature: wrong device: %s vs %s", device, devicemap ) raise SynapseError( - 404, - "Unknown device", - Codes.NOT_FOUND + 404, "Unknown device", Codes.NOT_FOUND ) - if len(devicemap) > 1: + key = devicemap[device_id] + del devicemap[device_id] + if len(devicemap) > 0: + # other devices were signed -- mark those as failures logger.error("upload signature: too many devices specified") + failure = _exception_to_failure(SynapseError( + 404, "Unknown device", Codes.NOT_FOUND + )) failures[user] = { - device: _exception_to_failure(SynapseError( - 404, - "Unknown device", - Codes.NOT_FOUND - )) + device: failure for device in devicemap.keys() } - key = devicemap[device_id] - if user_signing_key_id in stored_key.get("signatures", {}) \ .get(user_id, {}): # we already have the signature, so we can skip it @@ -770,25 +817,31 @@ class E2eKeysHandler(object): user_id, user_signing_verify_key, key, stored_key ) - signature = key["signatures"][user_id][user_signing_key_id] - signed_users.append(user) + signature = key["signatures"][user_id][user_signing_key_id] signature_list.append( (user_signing_key_id, user, device_id, signature) ) except SynapseError as e: + failure = _exception_to_failure(e) if device_id is None: failures[user] = { - device_id: _exception_to_failure(e) + device_id: failure for device_id in devicemap.keys() } else: - failures.setdefault(user, {})[device_id] \ - = _exception_to_failure(e) + failures.setdefault(user, {})[device_id] = failure + except SynapseError as e: + failure = _exception_to_failure(e) + for user, devicemap in signature.items(): + failures[user] = { + device_id: failure + for device_id in devicemap.keys() + } # store the signature, and send the appropriate notifications for sync logger.debug("upload signature failures: %r", failures) - yield self.store.store_e2e_device_signatures(user_id, signature_list) + yield self.store.store_e2e_cross_signing_signatures(user_id, signature_list) if len(self_device_ids): yield self.device_handler.notify_device_update(user_id, self_device_ids) @@ -797,6 +850,22 @@ class E2eKeysHandler(object): defer.returnValue({"failures": failures}) + @defer.inlineCallbacks + def _get_e2e_cross_signing_verify_key(self, user_id, key_type, from_user_id=None): + key = yield self.store.get_e2e_cross_signing_key( + user_id, key_type, from_user_id + ) + if key is None: + logger.error("no %s key found for %s", key_type, user_id) + raise SynapseError( + 404, + "No %s key found for %s" % (key_type, user_id), + Codes.NOT_FOUND + ) + key_id, verify_key = get_verify_key_from_cross_signing_key(key) + return key, key_id, verify_key + + def _check_cross_signing_key(key, user_id, key_type, signing_key=None): """Check a cross-signing key uploaded by a user. Performs some basic sanity checking, and ensures that it is signed, if a signature is required. @@ -851,21 +920,17 @@ def _check_device_signature(user_id, verify_key, signed_device, stored_device): # make sure that the device submitted matches what we have stored del signed_device["signatures"] - if "unsigned" in signed_device: - del signed_device["unsigned"] - if "signatures" in stored_device: - del stored_device["signatures"] - if "unsigned" in stored_device: - del stored_device["unsigned"] + # use pop to avoid exception if key doesn't exist + signed_device.pop("unsigned", None) + stored_device.pop("signatures", None) + stored_device.pop("unsigned", None) if signed_device != stored_device: logger.error( "upload signatures: key does not match %s vs %s", signed_device, stored_device ) raise SynapseError( - 400, - "Key does not match", - "M_MISMATCHED_KEY" + 400, "Key does not match", ) # check the signature @@ -887,6 +952,9 @@ def _check_device_signature(user_id, verify_key, signed_device, stored_device): def _exception_to_failure(e): + if isinstance(e, SynapseError): + return {"status": e.code, "errcode": e.errcode, "message": str(e)} + if isinstance(e, CodeMessageException): return {"status": e.code, "message": str(e)} diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 5c288d48b7..cb3c52cb8e 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -303,7 +303,7 @@ class SignaturesUploadServlet(RestServlet): } } """ - PATTERNS = client_v2_patterns("/keys/signatures/upload$") + PATTERNS = client_patterns("/keys/signatures/upload$") def __init__(self, hs): """ diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index fe786f3093..e68ce318af 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -132,7 +132,7 @@ class EndToEndKeyWorkerStore(SQLBaseStore): # get signatures on the device signature_sql = ( "SELECT * " - " FROM e2e_device_signatures " + " FROM e2e_cross_signing_signatures " " WHERE %s" ) % ( " OR ".join("(" + q + ")" for q in signature_query_clauses) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index a62c52eefa..b1d3a4cfae 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -17,9 +17,10 @@ import mock +import signedjson.key as key +import signedjson.sign as sign from twisted.internet import defer -import synapse.api.errors import synapse.handlers.e2e_keys import synapse.storage from synapse.api import errors @@ -210,3 +211,227 @@ class E2eKeysHandlerTestCase(unittest.TestCase): res = yield self.handler.query_local_devices({local_user: None}) self.assertDictEqual(res, {local_user: {}}) + + @defer.inlineCallbacks + def test_upload_signatures(self): + """should check signatures that are uploaded""" + # set up a user with cross-signing keys and a device. This user will + # try uploading signatures + local_user = "@boris:" + self.hs.hostname + device_id = "xyz" + # private key: OMkooTr76ega06xNvXIGPbgvvxAOzmQncN8VObS7aBA + device_pubkey = "NnHhnqiMFQkq969szYkooLaBAXW244ZOxgukCvm2ZeY" + device_key = { + "user_id": local_user, + "device_id": device_id, + "algorithms": ["m.olm.curve25519-aes-sha256", "m.megolm.v1.aes-sha"], + "keys": { + "curve25519:xyz": "curve25519+key", + "ed25519:xyz": device_pubkey + }, + "signatures": { + local_user: { + "ed25519:xyz": "something" + } + } + } + device_signing_key = key.decode_signing_key_base64( + "ed25519", + "xyz", + "OMkooTr76ega06xNvXIGPbgvvxAOzmQncN8VObS7aBA" + ) + + yield self.handler.upload_keys_for_user( + local_user, device_id, {"device_keys": device_key} + ) + + # private key: 2lonYOM6xYKdEsO+6KrC766xBcHnYnim1x/4LFGF8B0 + master_pubkey = "nqOvzeuGWT/sRx3h7+MHoInYj3Uk2LD/unI9kDYcHwk" + master_key = { + "user_id": local_user, + "usage": ["master"], + "keys": { + "ed25519:" + master_pubkey: master_pubkey + } + } + master_signing_key = key.decode_signing_key_base64( + "ed25519", master_pubkey, + "2lonYOM6xYKdEsO+6KrC766xBcHnYnim1x/4LFGF8B0" + ) + usersigning_pubkey = "Hq6gL+utB4ET+UvD5ci0kgAwsX6qP/zvf8v6OInU5iw" + usersigning_key = { + # private key: 4TL4AjRYwDVwD3pqQzcor+ez/euOB1/q78aTJ+czDNs + "user_id": local_user, + "usage": ["user_signing"], + "keys": { + "ed25519:" + usersigning_pubkey: usersigning_pubkey, + } + } + usersigning_signing_key = key.decode_signing_key_base64( + "ed25519", usersigning_pubkey, + "4TL4AjRYwDVwD3pqQzcor+ez/euOB1/q78aTJ+czDNs" + ) + sign.sign_json(usersigning_key, local_user, master_signing_key) + # private key: HvQBbU+hc2Zr+JP1sE0XwBe1pfZZEYtJNPJLZJtS+F8 + selfsigning_pubkey = "EmkqvokUn8p+vQAGZitOk4PWjp7Ukp3txV2TbMPEiBQ" + selfsigning_key = { + "user_id": local_user, + "usage": ["self_signing"], + "keys": { + "ed25519:" + selfsigning_pubkey: selfsigning_pubkey, + } + } + selfsigning_signing_key = key.decode_signing_key_base64( + "ed25519", selfsigning_pubkey, + "HvQBbU+hc2Zr+JP1sE0XwBe1pfZZEYtJNPJLZJtS+F8" + ) + sign.sign_json(selfsigning_key, local_user, master_signing_key) + cross_signing_keys = { + "master_key": master_key, + "user_signing_key": usersigning_key, + "self_signing_key": selfsigning_key, + } + yield self.handler.upload_signing_keys_for_user(local_user, cross_signing_keys) + + # set up another user with a master key. This user will be signed by + # the first user + other_user = "@otherboris:" + self.hs.hostname + other_master_pubkey = "fHZ3NPiKxoLQm5OoZbKa99SYxprOjNs4TwJUKP+twCM" + other_master_key = { + # private key: oyw2ZUx0O4GifbfFYM0nQvj9CL0b8B7cyN4FprtK8OI + "user_id": other_user, + "usage": ["master"], + "keys": { + "ed25519:" + other_master_pubkey: other_master_pubkey + } + } + yield self.handler.upload_signing_keys_for_user(other_user, { + "master_key": other_master_key + }) + + # test various signature failures (see below) + ret = yield self.handler.upload_signatures_for_device_keys( + local_user, + { + local_user: { + # fails because the signature is invalid + # should fail with INVALID_SIGNATURE + device_id: { + "user_id": local_user, + "device_id": device_id, + "algorithms": ["m.olm.curve25519-aes-sha256", "m.megolm.v1.aes-sha"], + "keys": { + "curve25519:xyz": "curve25519+key", + # private key: OMkooTr76ega06xNvXIGPbgvvxAOzmQncN8VObS7aBA + "ed25519:xyz": device_pubkey + }, + "signatures": { + local_user: { + "ed25519:" + selfsigning_pubkey: "something", + } + } + }, + # fails because device is unknown + # should fail with NOT_FOUND + "unknown": { + "user_id": local_user, + "device_id": "unknown", + "signatures": { + local_user: { + "ed25519:" + selfsigning_pubkey: "something", + } + } + }, + # fails because the signature is invalid + # should fail with INVALID_SIGNATURE + master_pubkey: { + "user_id": local_user, + "usage": ["master"], + "keys": { + "ed25519:" + master_pubkey: master_pubkey + }, + "signatures": { + local_user: { + "ed25519:" + device_pubkey: "something", + } + } + } + }, + other_user: { + # fails because the device is not the user's master-signing key + # should fail with NOT_FOUND + "unknown": { + "user_id": other_user, + "device_id": "unknown", + "signatures": { + local_user: { + "ed25519:" + usersigning_pubkey: "something", + } + } + }, + other_master_pubkey: { + # fails because the key doesn't match what the server has + # should fail with UNKNOWN + "user_id": other_user, + "usage": ["master"], + "keys": { + "ed25519:" + other_master_pubkey: other_master_pubkey + }, + "something": "random", + "signatures": { + local_user: { + "ed25519:" + usersigning_pubkey: "something", + } + } + } + } + } + ) + + user_failures = ret["failures"][local_user] + self.assertEqual(user_failures[device_id]["errcode"], errors.Codes.INVALID_SIGNATURE) + self.assertEqual(user_failures[master_pubkey]["errcode"], errors.Codes.INVALID_SIGNATURE) + self.assertEqual(user_failures["unknown"]["errcode"], errors.Codes.NOT_FOUND) + + other_user_failures = ret["failures"][other_user] + self.assertEqual(other_user_failures["unknown"]["errcode"], errors.Codes.NOT_FOUND) + self.assertEqual(other_user_failures[other_master_pubkey]["errcode"], errors.Codes.UNKNOWN) + + # test successful signatures + del device_key["signatures"] + sign.sign_json(device_key, local_user, selfsigning_signing_key) + sign.sign_json(master_key, local_user, device_signing_key) + sign.sign_json(other_master_key, local_user, usersigning_signing_key) + ret = yield self.handler.upload_signatures_for_device_keys( + local_user, + { + local_user: { + device_id: device_key, + master_pubkey: master_key + }, + other_user: { + other_master_pubkey: other_master_key + } + } + ) + + self.assertEqual(ret["failures"], {}) + + # fetch the signed keys/devices and make sure that the signatures are there + ret = yield self.handler.query_devices( + {"device_keys": {local_user: [], other_user: []}}, + 0, local_user + ) + + self.assertEqual( + ret["device_keys"][local_user]["xyz"]["signatures"][local_user]["ed25519:" + selfsigning_pubkey], + device_key["signatures"][local_user]["ed25519:" + selfsigning_pubkey] + ) + self.assertEqual( + ret["master_keys"][local_user]["signatures"][local_user]["ed25519:" + device_id], + master_key["signatures"][local_user]["ed25519:" + device_id] + ) + self.assertEqual( + ret["master_keys"][other_user]["signatures"][local_user]["ed25519:" + usersigning_pubkey], + other_master_key["signatures"][local_user]["ed25519:" + usersigning_pubkey] + ) -- cgit 1.4.1 From 7d6c70fc7ad08b94b8b577c537953a8d9b568562 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Mon, 22 Jul 2019 12:52:39 -0400 Subject: make black happy --- synapse/handlers/e2e_keys.py | 147 ++++++++++++++++------------------- synapse/rest/client/v2_alpha/keys.py | 1 + synapse/storage/end_to_end_keys.py | 24 +++--- tests/handlers/test_e2e_keys.py | 147 +++++++++++++++-------------------- 4 files changed, 141 insertions(+), 178 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 1148803c1e..74bceddc46 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -635,16 +635,14 @@ class E2eKeysHandler(object): self_device_ids = list(self_signatures.keys()) try: # get our self-signing key to verify the signatures - self_signing_key, self_signing_key_id, self_signing_verify_key \ - = yield self._get_e2e_cross_signing_verify_key( - user_id, "self_signing" - ) + self_signing_key, self_signing_key_id, self_signing_verify_key = yield self._get_e2e_cross_signing_verify_key( + user_id, "self_signing" + ) # get our master key, since it may be signed - master_key, master_key_id, master_verify_key \ - = yield self._get_e2e_cross_signing_verify_key( - user_id, "master" - ) + master_key, master_key_id, master_verify_key = yield self._get_e2e_cross_signing_verify_key( + user_id, "master" + ) # fetch our stored devices. This is used to 1. verify # signatures on the master key, and 2. to can compare with what @@ -652,15 +650,15 @@ class E2eKeysHandler(object): devices = yield self.store.get_e2e_device_keys([(user_id, None)]) if user_id not in devices: - raise SynapseError( - 404, "No device keys found", Codes.NOT_FOUND - ) + raise SynapseError(404, "No device keys found", Codes.NOT_FOUND) devices = devices[user_id] for device_id, device in self_signatures.items(): try: - if ("signatures" not in device or - user_id not in device["signatures"]): + if ( + "signatures" not in device + or user_id not in device["signatures"] + ): # no signature was sent raise SynapseError( 400, "Invalid signature", Codes.INVALID_SIGNATURE @@ -678,15 +676,21 @@ class E2eKeysHandler(object): # signature list. (In practice, we're likely to # only have only one signature anyways.) master_key_signature_list = [] - for signing_key_id, signature in device["signatures"][user_id].items(): + for signing_key_id, signature in device["signatures"][ + user_id + ].items(): alg, signing_device_id = signing_key_id.split(":", 1) - if (signing_device_id not in devices or - signing_key_id not in - devices[signing_device_id]["keys"]["keys"]): + if ( + signing_device_id not in devices + or signing_key_id + not in devices[signing_device_id]["keys"]["keys"] + ): # signed by an unknown device, or the # device does not have the key raise SynapseError( - 400, "Invalid signature", Codes.INVALID_SIGNATURE + 400, + "Invalid signature", + Codes.INVALID_SIGNATURE, ) sigs = device["signatures"] @@ -697,12 +701,12 @@ class E2eKeysHandler(object): master_key.pop("unsigned", None) if master_key != device: - raise SynapseError( - 400, "Key does not match" - ) + raise SynapseError(400, "Key does not match") # get the key and check the signature - pubkey = devices[signing_device_id]["keys"]["keys"][signing_key_id] + pubkey = devices[signing_device_id]["keys"]["keys"][ + signing_key_id + ] verify_key = decode_verify_key_bytes( signing_key_id, decode_base64(pubkey) ) @@ -711,7 +715,9 @@ class E2eKeysHandler(object): verify_signed_json(device, user_id, verify_key) except SignatureVerifyException: raise SynapseError( - 400, "Invalid signature", Codes.INVALID_SIGNATURE + 400, + "Invalid signature", + Codes.INVALID_SIGNATURE, ) master_key_signature_list.append( @@ -733,11 +739,10 @@ class E2eKeysHandler(object): try: stored_device = devices[device_id]["keys"] except KeyError: - raise SynapseError( - 404, "Unknown device", Codes.NOT_FOUND - ) - if self_signing_key_id in stored_device.get("signatures", {}) \ - .get(user_id, {}): + raise SynapseError(404, "Unknown device", Codes.NOT_FOUND) + if self_signing_key_id in stored_device.get( + "signatures", {} + ).get(user_id, {}): # we already have a signature on this device, so we # can skip it, since it should be exactly the same continue @@ -751,8 +756,9 @@ class E2eKeysHandler(object): (self_signing_key_id, user_id, device_id, signature) ) except SynapseError as e: - failures.setdefault(user_id, {})[device_id] \ - = _exception_to_failure(e) + failures.setdefault(user_id, {})[ + device_id + ] = _exception_to_failure(e) except SynapseError as e: failures[user_id] = { device: _exception_to_failure(e) @@ -766,20 +772,18 @@ class E2eKeysHandler(object): try: # get our user-signing key to verify the signatures - user_signing_key, user_signing_key_id, user_signing_verify_key \ - = yield self._get_e2e_cross_signing_verify_key( - user_id, "user_signing" - ) + user_signing_key, user_signing_key_id, user_signing_verify_key = yield self._get_e2e_cross_signing_verify_key( + user_id, "user_signing" + ) for user, devicemap in signatures.items(): device_id = None try: # get the user's master key, to make sure it matches # what was sent - stored_key, stored_key_id, _ \ - = yield self._get_e2e_cross_signing_verify_key( - user, "master", user_id - ) + stored_key, stored_key_id, _ = yield self._get_e2e_cross_signing_verify_key( + user, "master", user_id + ) # make sure that the user's master key is the one that # was signed (and no others) @@ -790,26 +794,25 @@ class E2eKeysHandler(object): device_id = None logger.error( "upload signature: wrong device: %s vs %s", - device, devicemap - ) - raise SynapseError( - 404, "Unknown device", Codes.NOT_FOUND + device, + devicemap, ) + raise SynapseError(404, "Unknown device", Codes.NOT_FOUND) key = devicemap[device_id] del devicemap[device_id] if len(devicemap) > 0: # other devices were signed -- mark those as failures logger.error("upload signature: too many devices specified") - failure = _exception_to_failure(SynapseError( - 404, "Unknown device", Codes.NOT_FOUND - )) + failure = _exception_to_failure( + SynapseError(404, "Unknown device", Codes.NOT_FOUND) + ) failures[user] = { - device: failure - for device in devicemap.keys() + device: failure for device in devicemap.keys() } - if user_signing_key_id in stored_key.get("signatures", {}) \ - .get(user_id, {}): + if user_signing_key_id in stored_key.get("signatures", {}).get( + user_id, {} + ): # we already have the signature, so we can skip it continue @@ -826,8 +829,7 @@ class E2eKeysHandler(object): failure = _exception_to_failure(e) if device_id is None: failures[user] = { - device_id: failure - for device_id in devicemap.keys() + device_id: failure for device_id in devicemap.keys() } else: failures.setdefault(user, {})[device_id] = failure @@ -835,8 +837,7 @@ class E2eKeysHandler(object): failure = _exception_to_failure(e) for user, devicemap in signature.items(): failures[user] = { - device_id: failure - for device_id in devicemap.keys() + device_id: failure for device_id in devicemap.keys() } # store the signature, and send the appropriate notifications for sync @@ -846,7 +847,9 @@ class E2eKeysHandler(object): if len(self_device_ids): yield self.device_handler.notify_device_update(user_id, self_device_ids) if len(signed_users): - yield self.device_handler.notify_user_signature_update(user_id, signed_users) + yield self.device_handler.notify_user_signature_update( + user_id, signed_users + ) defer.returnValue({"failures": failures}) @@ -858,9 +861,7 @@ class E2eKeysHandler(object): if key is None: logger.error("no %s key found for %s", key_type, user_id) raise SynapseError( - 404, - "No %s key found for %s" % (key_type, user_id), - Codes.NOT_FOUND + 404, "No %s key found for %s" % (key_type, user_id), Codes.NOT_FOUND ) key_id, verify_key = get_verify_key_from_cross_signing_key(key) return key, key_id, verify_key @@ -907,14 +908,13 @@ def _check_device_signature(user_id, verify_key, signed_device, stored_device): key_id = "%s:%s" % (verify_key.alg, verify_key.version) # make sure the device is signed - if ("signatures" not in signed_device or user_id not in signed_device["signatures"] - or key_id not in signed_device["signatures"][user_id]): + if ( + "signatures" not in signed_device + or user_id not in signed_device["signatures"] + or key_id not in signed_device["signatures"][user_id] + ): logger.error("upload signature: user not found in signatures") - raise SynapseError( - 400, - "Invalid signature", - Codes.INVALID_SIGNATURE - ) + raise SynapseError(400, "Invalid signature", Codes.INVALID_SIGNATURE) signature = signed_device["signatures"][user_id][key_id] @@ -927,28 +927,19 @@ def _check_device_signature(user_id, verify_key, signed_device, stored_device): if signed_device != stored_device: logger.error( "upload signatures: key does not match %s vs %s", - signed_device, stored_device - ) - raise SynapseError( - 400, "Key does not match", + signed_device, + stored_device, ) + raise SynapseError(400, "Key does not match") # check the signature - signed_device["signatures"] = { - user_id: { - key_id: signature - } - } + signed_device["signatures"] = {user_id: {key_id: signature}} try: verify_signed_json(signed_device, user_id, verify_key) except SignatureVerifyException: logger.error("invalid signature on key") - raise SynapseError( - 400, - "Invalid signature", - Codes.INVALID_SIGNATURE - ) + raise SynapseError(400, "Invalid signature", Codes.INVALID_SIGNATURE) def _exception_to_failure(e): diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index cb3c52cb8e..a205281830 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -303,6 +303,7 @@ class SignaturesUploadServlet(RestServlet): } } """ + PATTERNS = client_patterns("/keys/signatures/upload$") def __init__(self, hs): diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index e68ce318af..258e8dcb47 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -62,9 +62,9 @@ class EndToEndKeyWorkerStore(SQLBaseStore): # add cross-signing signatures to the keys if "signatures" in device_info: for sig_user_id, sigs in device_info["signatures"].items(): - device_info["keys"].setdefault("signatures", {}) \ - .setdefault(sig_user_id, {}) \ - .update(sigs) + device_info["keys"].setdefault("signatures", {}).setdefault( + sig_user_id, {} + ).update(sigs) return results @@ -131,12 +131,8 @@ class EndToEndKeyWorkerStore(SQLBaseStore): # get signatures on the device signature_sql = ( - "SELECT * " - " FROM e2e_cross_signing_signatures " - " WHERE %s" - ) % ( - " OR ".join("(" + q + ")" for q in signature_query_clauses) - ) + "SELECT * " " FROM e2e_cross_signing_signatures " " WHERE %s" + ) % (" OR ".join("(" + q + ")" for q in signature_query_clauses)) txn.execute(signature_sql, signature_query_params) rows = self.cursor_to_dict(txn) @@ -144,12 +140,10 @@ class EndToEndKeyWorkerStore(SQLBaseStore): for row in rows: target_user_id = row["target_user_id"] target_device_id = row["target_device_id"] - if target_user_id in result \ - and target_device_id in result[target_user_id]: - result[target_user_id][target_device_id] \ - .setdefault("signatures", {}) \ - .setdefault(row["user_id"], {})[row["key_id"]] \ - = row["signature"] + if target_user_id in result and target_device_id in result[target_user_id]: + result[target_user_id][target_device_id].setdefault( + "signatures", {} + ).setdefault(row["user_id"], {})[row["key_id"]] = row["signature"] log_kv(result) return result diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index b1d3a4cfae..8c0ee3f7d3 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -225,20 +225,11 @@ class E2eKeysHandlerTestCase(unittest.TestCase): "user_id": local_user, "device_id": device_id, "algorithms": ["m.olm.curve25519-aes-sha256", "m.megolm.v1.aes-sha"], - "keys": { - "curve25519:xyz": "curve25519+key", - "ed25519:xyz": device_pubkey - }, - "signatures": { - local_user: { - "ed25519:xyz": "something" - } - } + "keys": {"curve25519:xyz": "curve25519+key", "ed25519:xyz": device_pubkey}, + "signatures": {local_user: {"ed25519:xyz": "something"}}, } device_signing_key = key.decode_signing_key_base64( - "ed25519", - "xyz", - "OMkooTr76ega06xNvXIGPbgvvxAOzmQncN8VObS7aBA" + "ed25519", "xyz", "OMkooTr76ega06xNvXIGPbgvvxAOzmQncN8VObS7aBA" ) yield self.handler.upload_keys_for_user( @@ -250,26 +241,20 @@ class E2eKeysHandlerTestCase(unittest.TestCase): master_key = { "user_id": local_user, "usage": ["master"], - "keys": { - "ed25519:" + master_pubkey: master_pubkey - } + "keys": {"ed25519:" + master_pubkey: master_pubkey}, } master_signing_key = key.decode_signing_key_base64( - "ed25519", master_pubkey, - "2lonYOM6xYKdEsO+6KrC766xBcHnYnim1x/4LFGF8B0" + "ed25519", master_pubkey, "2lonYOM6xYKdEsO+6KrC766xBcHnYnim1x/4LFGF8B0" ) usersigning_pubkey = "Hq6gL+utB4ET+UvD5ci0kgAwsX6qP/zvf8v6OInU5iw" usersigning_key = { # private key: 4TL4AjRYwDVwD3pqQzcor+ez/euOB1/q78aTJ+czDNs "user_id": local_user, "usage": ["user_signing"], - "keys": { - "ed25519:" + usersigning_pubkey: usersigning_pubkey, - } + "keys": {"ed25519:" + usersigning_pubkey: usersigning_pubkey}, } usersigning_signing_key = key.decode_signing_key_base64( - "ed25519", usersigning_pubkey, - "4TL4AjRYwDVwD3pqQzcor+ez/euOB1/q78aTJ+czDNs" + "ed25519", usersigning_pubkey, "4TL4AjRYwDVwD3pqQzcor+ez/euOB1/q78aTJ+czDNs" ) sign.sign_json(usersigning_key, local_user, master_signing_key) # private key: HvQBbU+hc2Zr+JP1sE0XwBe1pfZZEYtJNPJLZJtS+F8 @@ -277,13 +262,10 @@ class E2eKeysHandlerTestCase(unittest.TestCase): selfsigning_key = { "user_id": local_user, "usage": ["self_signing"], - "keys": { - "ed25519:" + selfsigning_pubkey: selfsigning_pubkey, - } + "keys": {"ed25519:" + selfsigning_pubkey: selfsigning_pubkey}, } selfsigning_signing_key = key.decode_signing_key_base64( - "ed25519", selfsigning_pubkey, - "HvQBbU+hc2Zr+JP1sE0XwBe1pfZZEYtJNPJLZJtS+F8" + "ed25519", selfsigning_pubkey, "HvQBbU+hc2Zr+JP1sE0XwBe1pfZZEYtJNPJLZJtS+F8" ) sign.sign_json(selfsigning_key, local_user, master_signing_key) cross_signing_keys = { @@ -301,13 +283,11 @@ class E2eKeysHandlerTestCase(unittest.TestCase): # private key: oyw2ZUx0O4GifbfFYM0nQvj9CL0b8B7cyN4FprtK8OI "user_id": other_user, "usage": ["master"], - "keys": { - "ed25519:" + other_master_pubkey: other_master_pubkey - } + "keys": {"ed25519:" + other_master_pubkey: other_master_pubkey}, } - yield self.handler.upload_signing_keys_for_user(other_user, { - "master_key": other_master_key - }) + yield self.handler.upload_signing_keys_for_user( + other_user, {"master_key": other_master_key} + ) # test various signature failures (see below) ret = yield self.handler.upload_signatures_for_device_keys( @@ -319,17 +299,18 @@ class E2eKeysHandlerTestCase(unittest.TestCase): device_id: { "user_id": local_user, "device_id": device_id, - "algorithms": ["m.olm.curve25519-aes-sha256", "m.megolm.v1.aes-sha"], + "algorithms": [ + "m.olm.curve25519-aes-sha256", + "m.megolm.v1.aes-sha", + ], "keys": { "curve25519:xyz": "curve25519+key", # private key: OMkooTr76ega06xNvXIGPbgvvxAOzmQncN8VObS7aBA - "ed25519:xyz": device_pubkey + "ed25519:xyz": device_pubkey, }, "signatures": { - local_user: { - "ed25519:" + selfsigning_pubkey: "something", - } - } + local_user: {"ed25519:" + selfsigning_pubkey: "something"} + }, }, # fails because device is unknown # should fail with NOT_FOUND @@ -337,25 +318,19 @@ class E2eKeysHandlerTestCase(unittest.TestCase): "user_id": local_user, "device_id": "unknown", "signatures": { - local_user: { - "ed25519:" + selfsigning_pubkey: "something", - } - } + local_user: {"ed25519:" + selfsigning_pubkey: "something"} + }, }, # fails because the signature is invalid # should fail with INVALID_SIGNATURE master_pubkey: { "user_id": local_user, "usage": ["master"], - "keys": { - "ed25519:" + master_pubkey: master_pubkey - }, + "keys": {"ed25519:" + master_pubkey: master_pubkey}, "signatures": { - local_user: { - "ed25519:" + device_pubkey: "something", - } - } - } + local_user: {"ed25519:" + device_pubkey: "something"} + }, + }, }, other_user: { # fails because the device is not the user's master-signing key @@ -364,38 +339,40 @@ class E2eKeysHandlerTestCase(unittest.TestCase): "user_id": other_user, "device_id": "unknown", "signatures": { - local_user: { - "ed25519:" + usersigning_pubkey: "something", - } - } + local_user: {"ed25519:" + usersigning_pubkey: "something"} + }, }, other_master_pubkey: { # fails because the key doesn't match what the server has # should fail with UNKNOWN "user_id": other_user, "usage": ["master"], - "keys": { - "ed25519:" + other_master_pubkey: other_master_pubkey - }, + "keys": {"ed25519:" + other_master_pubkey: other_master_pubkey}, "something": "random", "signatures": { - local_user: { - "ed25519:" + usersigning_pubkey: "something", - } - } - } - } - } + local_user: {"ed25519:" + usersigning_pubkey: "something"} + }, + }, + }, + }, ) user_failures = ret["failures"][local_user] - self.assertEqual(user_failures[device_id]["errcode"], errors.Codes.INVALID_SIGNATURE) - self.assertEqual(user_failures[master_pubkey]["errcode"], errors.Codes.INVALID_SIGNATURE) + self.assertEqual( + user_failures[device_id]["errcode"], errors.Codes.INVALID_SIGNATURE + ) + self.assertEqual( + user_failures[master_pubkey]["errcode"], errors.Codes.INVALID_SIGNATURE + ) self.assertEqual(user_failures["unknown"]["errcode"], errors.Codes.NOT_FOUND) other_user_failures = ret["failures"][other_user] - self.assertEqual(other_user_failures["unknown"]["errcode"], errors.Codes.NOT_FOUND) - self.assertEqual(other_user_failures[other_master_pubkey]["errcode"], errors.Codes.UNKNOWN) + self.assertEqual( + other_user_failures["unknown"]["errcode"], errors.Codes.NOT_FOUND + ) + self.assertEqual( + other_user_failures[other_master_pubkey]["errcode"], errors.Codes.UNKNOWN + ) # test successful signatures del device_key["signatures"] @@ -405,33 +382,33 @@ class E2eKeysHandlerTestCase(unittest.TestCase): ret = yield self.handler.upload_signatures_for_device_keys( local_user, { - local_user: { - device_id: device_key, - master_pubkey: master_key - }, - other_user: { - other_master_pubkey: other_master_key - } - } + local_user: {device_id: device_key, master_pubkey: master_key}, + other_user: {other_master_pubkey: other_master_key}, + }, ) self.assertEqual(ret["failures"], {}) # fetch the signed keys/devices and make sure that the signatures are there ret = yield self.handler.query_devices( - {"device_keys": {local_user: [], other_user: []}}, - 0, local_user + {"device_keys": {local_user: [], other_user: []}}, 0, local_user ) self.assertEqual( - ret["device_keys"][local_user]["xyz"]["signatures"][local_user]["ed25519:" + selfsigning_pubkey], - device_key["signatures"][local_user]["ed25519:" + selfsigning_pubkey] + ret["device_keys"][local_user]["xyz"]["signatures"][local_user][ + "ed25519:" + selfsigning_pubkey + ], + device_key["signatures"][local_user]["ed25519:" + selfsigning_pubkey], ) self.assertEqual( - ret["master_keys"][local_user]["signatures"][local_user]["ed25519:" + device_id], - master_key["signatures"][local_user]["ed25519:" + device_id] + ret["master_keys"][local_user]["signatures"][local_user][ + "ed25519:" + device_id + ], + master_key["signatures"][local_user]["ed25519:" + device_id], ) self.assertEqual( - ret["master_keys"][other_user]["signatures"][local_user]["ed25519:" + usersigning_pubkey], - other_master_key["signatures"][local_user]["ed25519:" + usersigning_pubkey] + ret["master_keys"][other_user]["signatures"][local_user][ + "ed25519:" + usersigning_pubkey + ], + other_master_key["signatures"][local_user]["ed25519:" + usersigning_pubkey], ) -- cgit 1.4.1 From 9061b4198af4b30bb99d98aab7ad227f8ed636f8 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Mon, 22 Jul 2019 12:58:04 -0400 Subject: make isort happy --- tests/handlers/test_e2e_keys.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 8c0ee3f7d3..c900451e03 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -19,6 +19,7 @@ import mock import signedjson.key as key import signedjson.sign as sign + from twisted.internet import defer import synapse.handlers.e2e_keys -- cgit 1.4.1 From 5914fd09c725342d03f702a50ec1da6290e946a9 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Mon, 22 Jul 2019 13:01:10 -0400 Subject: add test --- tests/handlers/test_e2e_keys.py | 88 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index c900451e03..316dd6259d 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -183,6 +183,94 @@ class E2eKeysHandlerTestCase(unittest.TestCase): ) self.assertDictEqual(devices["master_keys"], {local_user: keys2["master_key"]}) + @defer.inlineCallbacks + def test_reupload_signatures(self): + """re-uploading a signature should not fail""" + local_user = "@boris:" + self.hs.hostname + keys1 = { + "master_key": { + # private key: HvQBbU+hc2Zr+JP1sE0XwBe1pfZZEYtJNPJLZJtS+F8 + "user_id": local_user, + "usage": ["master"], + "keys": { + "ed25519:EmkqvokUn8p+vQAGZitOk4PWjp7Ukp3txV2TbMPEiBQ": "EmkqvokUn8p+vQAGZitOk4PWjp7Ukp3txV2TbMPEiBQ" + }, + }, + "self_signing_key": { + # private key: 2lonYOM6xYKdEsO+6KrC766xBcHnYnim1x/4LFGF8B0 + "user_id": local_user, + "usage": ["self_signing"], + "keys": { + "ed25519:nqOvzeuGWT/sRx3h7+MHoInYj3Uk2LD/unI9kDYcHwk": "nqOvzeuGWT/sRx3h7+MHoInYj3Uk2LD/unI9kDYcHwk" + }, + }, + } + master_signing_key = key.decode_signing_key_base64( + "ed25519", + "EmkqvokUn8p+vQAGZitOk4PWjp7Ukp3txV2TbMPEiBQ", + "HvQBbU+hc2Zr+JP1sE0XwBe1pfZZEYtJNPJLZJtS+F8", + ) + sign.sign_json(keys1["self_signing_key"], local_user, master_signing_key) + signing_key = key.decode_signing_key_base64( + "ed25519", + "nqOvzeuGWT/sRx3h7+MHoInYj3Uk2LD/unI9kDYcHwk", + "2lonYOM6xYKdEsO+6KrC766xBcHnYnim1x/4LFGF8B0", + ) + yield self.handler.upload_signing_keys_for_user(local_user, keys1) + + # upload two device keys, which will be signed later by the self-signing key + device_key_1 = { + "user_id": local_user, + "device_id": "abc", + "algorithms": ["m.olm.curve25519-aes-sha256", "m.megolm.v1.aes-sha"], + "keys": { + "ed25519:abc": "base64+ed25519+key", + "curve25519:abc": "base64+curve25519+key", + }, + "signatures": {local_user: {"ed25519:abc": "base64+signature"}}, + } + device_key_2 = { + "user_id": local_user, + "device_id": "def", + "algorithms": ["m.olm.curve25519-aes-sha256", "m.megolm.v1.aes-sha"], + "keys": { + "ed25519:def": "base64+ed25519+key", + "curve25519:def": "base64+curve25519+key", + }, + "signatures": {local_user: {"ed25519:def": "base64+signature"}}, + } + + yield self.handler.upload_keys_for_user( + local_user, "abc", {"device_keys": device_key_1} + ) + yield self.handler.upload_keys_for_user( + local_user, "def", {"device_keys": device_key_2} + ) + + # sign the first device key and upload it + del device_key_1["signatures"] + sign.sign_json(device_key_1, local_user, signing_key) + yield self.handler.upload_signatures_for_device_keys( + local_user, {local_user: {"abc": device_key_1}} + ) + + # sign the second device key and upload both device keys. The server + # should ignore the first device key since it already has a valid + # signature for it + del device_key_2["signatures"] + sign.sign_json(device_key_2, local_user, signing_key) + yield self.handler.upload_signatures_for_device_keys( + local_user, {local_user: {"abc": device_key_1, "def": device_key_2}} + ) + + device_key_1["signatures"][local_user]["ed25519:abc"] = "base64+signature" + device_key_2["signatures"][local_user]["ed25519:def"] = "base64+signature" + devices = yield self.handler.query_devices({"device_keys": {local_user: []}}, 0) + del devices["device_keys"][local_user]["abc"]["unsigned"] + del devices["device_keys"][local_user]["def"]["unsigned"] + self.assertDictEqual(devices["device_keys"][local_user]["abc"], device_key_1) + self.assertDictEqual(devices["device_keys"][local_user]["def"], device_key_2) + @defer.inlineCallbacks def test_self_signing_key_doesnt_show_up_as_device(self): """signing keys should be hidden when fetching a user's devices""" -- cgit 1.4.1 From c8dc740a94f20c0bca9aaa30b9d0fd211361a21e Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 4 Sep 2019 22:30:45 -0400 Subject: update with newer coding style --- synapse/handlers/e2e_keys.py | 2 +- synapse/rest/client/v2_alpha/keys.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 74bceddc46..d5d6e6e027 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -851,7 +851,7 @@ class E2eKeysHandler(object): user_id, signed_users ) - defer.returnValue({"failures": failures}) + return {"failures": failures} @defer.inlineCallbacks def _get_e2e_cross_signing_verify_key(self, user_id, key_type, from_user_id=None): diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index a205281830..341567ae21 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -274,7 +274,7 @@ class SigningKeyUploadServlet(RestServlet): ) result = yield self.e2e_keys_handler.upload_signing_keys_for_user(user_id, body) - return (200, result) + return 200, result class SignaturesUploadServlet(RestServlet): @@ -324,7 +324,7 @@ class SignaturesUploadServlet(RestServlet): result = yield self.e2e_keys_handler.upload_signatures_for_device_keys( user_id, body ) - defer.returnValue((200, result)) + return 200, result def register_servlets(hs, http_server): -- cgit 1.4.1 From 90d17a3d28d7a87fe1231db3726759339c914753 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 5 Sep 2019 14:00:30 +0100 Subject: Add POST /_matrix/client/r0/account/3pid/unbind (MSC2140) (#5980) Implements `POST /_matrix/client/r0/account/3pid/unbind` from [MSC2140](https://github.com/matrix-org/matrix-doc/blob/dbkr/tos_2/proposals/2140-terms-of-service-2.md#post-_matrixclientr0account3pidunbind). --- changelog.d/5980.feature | 1 + synapse/handlers/identity.py | 3 ++- synapse/rest/client/v2_alpha/account.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5980.feature diff --git a/changelog.d/5980.feature b/changelog.d/5980.feature new file mode 100644 index 0000000000..f25d8d81d9 --- /dev/null +++ b/changelog.d/5980.feature @@ -0,0 +1 @@ +Add POST /_matrix/client/r0/account/3pid/unbind endpoint from MSC2140 for unbinding a 3PID from an identity server without removing it from the homeserver user account. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index d199521b58..5540f9f4d5 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -137,7 +137,8 @@ class IdentityHandler(BaseHandler): @defer.inlineCallbacks def try_unbind_threepid(self, mxid, threepid): - """Removes a binding from an identity server + """Attempt to remove a 3PID from an identity server, or if one is not provided, all + identity servers we're aware the binding is present on Args: mxid (str): Matrix user ID of binding to be removed diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 0620a4d0cf..a4be518006 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -571,6 +571,38 @@ class ThreepidRestServlet(RestServlet): return 200, {} +class ThreepidUnbindRestServlet(RestServlet): + PATTERNS = client_patterns("/account/3pid/unbind$") + + def __init__(self, hs): + super(ThreepidUnbindRestServlet, self).__init__() + self.hs = hs + self.identity_handler = hs.get_handlers().identity_handler + self.auth = hs.get_auth() + self.datastore = self.hs.get_datastore() + + @defer.inlineCallbacks + def on_POST(self, request): + """Unbind the given 3pid from a specific identity server, or identity servers that are + known to have this 3pid bound + """ + requester = yield self.auth.get_user_by_req(request) + body = parse_json_object_from_request(request) + assert_params_in_dict(body, ["medium", "address"]) + + medium = body.get("medium") + address = body.get("address") + id_server = body.get("id_server") + + # Attempt to unbind the threepid from an identity server. If id_server is None, try to + # unbind from all identity servers this threepid has been added to in the past + result = yield self.identity_handler.try_unbind_threepid( + requester.user.to_string(), + {"address": address, "medium": medium, "id_server": id_server}, + ) + return 200, {"id_server_unbind_result": "success" if result else "no-support"} + + class ThreepidDeleteRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid/delete$") @@ -629,5 +661,6 @@ def register_servlets(hs, http_server): EmailThreepidRequestTokenRestServlet(hs).register(http_server) MsisdnThreepidRequestTokenRestServlet(hs).register(http_server) ThreepidRestServlet(hs).register(http_server) + ThreepidUnbindRestServlet(hs).register(http_server) ThreepidDeleteRestServlet(hs).register(http_server) WhoamiRestServlet(hs).register(http_server) -- cgit 1.4.1 From b9cfd3c375c551902093b0dac1df9e0b4d6759cc Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 5 Sep 2019 14:22:15 +0100 Subject: Fix opentracing contexts missing from outbound replication requests (#5982) --- changelog.d/5982.bugfix | 1 + synapse/logging/opentracing.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 changelog.d/5982.bugfix diff --git a/changelog.d/5982.bugfix b/changelog.d/5982.bugfix new file mode 100644 index 0000000000..3ea281a3a0 --- /dev/null +++ b/changelog.d/5982.bugfix @@ -0,0 +1 @@ +Include missing opentracing contexts in outbout replication requests. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 256b972aaa..dbf80e2024 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -493,6 +493,11 @@ def inject_active_span_twisted_headers(headers, destination, check_destination=T Args: headers (twisted.web.http_headers.Headers) + destination (str): address of entity receiving the span context. If check_destination + is true the context will only be injected if the destination matches the + opentracing whitelist + check_destination (bool): If false, destination will be ignored and the context + will always be injected. span (opentracing.Span) Returns: @@ -525,6 +530,11 @@ def inject_active_span_byte_dict(headers, destination, check_destination=True): Args: headers (dict) + destination (str): address of entity receiving the span context. If check_destination + is true the context will only be injected if the destination matches the + opentracing whitelist + check_destination (bool): If false, destination will be ignored and the context + will always be injected. span (opentracing.Span) Returns: @@ -537,7 +547,7 @@ def inject_active_span_byte_dict(headers, destination, check_destination=True): here: https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py """ - if not whitelisted_homeserver(destination): + if check_destination and not whitelisted_homeserver(destination): return span = opentracing.tracer.active_span @@ -556,9 +566,11 @@ def inject_active_span_text_map(carrier, destination, check_destination=True): Args: carrier (dict) - destination (str): the name of the remote server. The span context - will only be injected if the destination matches the homeserver_whitelist - or destination is None. + destination (str): address of entity receiving the span context. If check_destination + is true the context will only be injected if the destination matches the + opentracing whitelist + check_destination (bool): If false, destination will be ignored and the context + will always be injected. Returns: In-place modification of carrier -- cgit 1.4.1 From a0d294c306d2e345bb53078791858c41f3101424 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 5 Sep 2019 14:31:22 +0100 Subject: Switch to using v2 Identity Service APIs other than lookup (MSC 2140) (#5892) --- changelog.d/5892.misc | 1 + contrib/cmdclient/console.py | 5 + synapse/handlers/identity.py | 160 ++++++++++++++++++++++++-------- synapse/rest/client/v2_alpha/account.py | 13 +-- 4 files changed, 132 insertions(+), 47 deletions(-) create mode 100644 changelog.d/5892.misc diff --git a/changelog.d/5892.misc b/changelog.d/5892.misc new file mode 100644 index 0000000000..939fe8c655 --- /dev/null +++ b/changelog.d/5892.misc @@ -0,0 +1 @@ +Compatibility with v2 Identity Service APIs other than /lookup. \ No newline at end of file diff --git a/contrib/cmdclient/console.py b/contrib/cmdclient/console.py index af8f39c8c2..899c650b0c 100755 --- a/contrib/cmdclient/console.py +++ b/contrib/cmdclient/console.py @@ -268,6 +268,7 @@ class SynapseCmd(cmd.Cmd): @defer.inlineCallbacks def _do_emailrequest(self, args): + # TODO: Update to use v2 Identity Service API endpoint url = ( self._identityServerUrl() + "/_matrix/identity/api/v1/validate/email/requestToken" @@ -302,6 +303,7 @@ class SynapseCmd(cmd.Cmd): @defer.inlineCallbacks def _do_emailvalidate(self, args): + # TODO: Update to use v2 Identity Service API endpoint url = ( self._identityServerUrl() + "/_matrix/identity/api/v1/validate/email/submitToken" @@ -330,6 +332,7 @@ class SynapseCmd(cmd.Cmd): @defer.inlineCallbacks def _do_3pidbind(self, args): + # TODO: Update to use v2 Identity Service API endpoint url = self._identityServerUrl() + "/_matrix/identity/api/v1/3pid/bind" json_res = yield self.http_client.do_request( @@ -398,6 +401,7 @@ class SynapseCmd(cmd.Cmd): @defer.inlineCallbacks def _do_invite(self, roomid, userstring): if not userstring.startswith("@") and self._is_on("complete_usernames"): + # TODO: Update to use v2 Identity Service API endpoint url = self._identityServerUrl() + "/_matrix/identity/api/v1/lookup" json_res = yield self.http_client.do_request( @@ -407,6 +411,7 @@ class SynapseCmd(cmd.Cmd): mxid = None if "mxid" in json_res and "signatures" in json_res: + # TODO: Update to use v2 Identity Service API endpoint url = ( self._identityServerUrl() + "/_matrix/identity/api/v1/pubkey/ed25519" diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 5540f9f4d5..583b612dd9 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -61,21 +61,76 @@ class IdentityHandler(BaseHandler): return False return True + def _extract_items_from_creds_dict(self, creds): + """ + Retrieve entries from a "credentials" dictionary + + Args: + creds (dict[str, str]): Dictionary of credentials that contain the following keys: + * client_secret|clientSecret: A unique secret str provided by the client + * id_server|idServer: the domain of the identity server to query + * id_access_token: The access token to authenticate to the identity + server with. + + Returns: + tuple(str, str, str|None): A tuple containing the client_secret, the id_server, + and the id_access_token value if available. + """ + client_secret = creds.get("client_secret") or creds.get("clientSecret") + if not client_secret: + raise SynapseError( + 400, "No client_secret in creds", errcode=Codes.MISSING_PARAM + ) + + id_server = creds.get("id_server") or creds.get("idServer") + if not id_server: + raise SynapseError( + 400, "No id_server in creds", errcode=Codes.MISSING_PARAM + ) + + id_access_token = creds.get("id_access_token") + return client_secret, id_server, id_access_token + @defer.inlineCallbacks - def threepid_from_creds(self, creds): - if "id_server" in creds: - id_server = creds["id_server"] - elif "idServer" in creds: - id_server = creds["idServer"] - else: - raise SynapseError(400, "No id_server in creds") + def threepid_from_creds(self, creds, use_v2=True): + """ + Retrieve and validate a threepid identitier from a "credentials" dictionary + + Args: + creds (dict[str, str]): Dictionary of credentials that contain the following keys: + * client_secret|clientSecret: A unique secret str provided by the client + * id_server|idServer: the domain of the identity server to query + * id_access_token: The access token to authenticate to the identity + server with. Required if use_v2 is true + use_v2 (bool): Whether to use v2 Identity Service API endpoints + + Returns: + Deferred[dict[str,str|int]|None]: A dictionary consisting of response params to + the /getValidated3pid endpoint of the Identity Service API, or None if the + threepid was not found + """ + client_secret, id_server, id_access_token = self._extract_items_from_creds_dict( + creds + ) - if "client_secret" in creds: - client_secret = creds["client_secret"] - elif "clientSecret" in creds: - client_secret = creds["clientSecret"] + # If an id_access_token is not supplied, force usage of v1 + if id_access_token is None: + use_v2 = False + + query_params = {"sid": creds["sid"], "client_secret": client_secret} + + # Decide which API endpoint URLs and query parameters to use + if use_v2: + url = "https://%s%s" % ( + id_server, + "/_matrix/identity/v2/3pid/getValidated3pid", + ) + query_params["id_access_token"] = id_access_token else: - raise SynapseError(400, "No client_secret in creds") + url = "https://%s%s" % ( + id_server, + "/_matrix/identity/api/v1/3pid/getValidated3pid", + ) if not self._should_trust_id_server(id_server): logger.warn( @@ -85,43 +140,55 @@ class IdentityHandler(BaseHandler): return None try: - data = yield self.http_client.get_json( - "https://%s%s" - % (id_server, "/_matrix/identity/api/v1/3pid/getValidated3pid"), - {"sid": creds["sid"], "client_secret": client_secret}, - ) + data = yield self.http_client.get_json(url, query_params) + return data if "medium" in data else None except HttpResponseException as e: - logger.info("getValidated3pid failed with Matrix error: %r", e) - raise e.to_synapse_error() + if e.code != 404 or not use_v2: + # Generic failure + logger.info("getValidated3pid failed with Matrix error: %r", e) + raise e.to_synapse_error() - if "medium" in data: - return data - return None + # This identity server is too old to understand Identity Service API v2 + # Attempt v1 endpoint + logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", url) + return (yield self.threepid_from_creds(creds, use_v2=False)) @defer.inlineCallbacks - def bind_threepid(self, creds, mxid): + def bind_threepid(self, creds, mxid, use_v2=True): + """Bind a 3PID to an identity server + + Args: + creds (dict[str, str]): Dictionary of credentials that contain the following keys: + * client_secret|clientSecret: A unique secret str provided by the client + * id_server|idServer: the domain of the identity server to query + * id_access_token: The access token to authenticate to the identity + server with. Required if use_v2 is true + mxid (str): The MXID to bind the 3PID to + use_v2 (bool): Whether to use v2 Identity Service API endpoints + + Returns: + Deferred[dict]: The response from the identity server + """ logger.debug("binding threepid %r to %s", creds, mxid) - data = None - if "id_server" in creds: - id_server = creds["id_server"] - elif "idServer" in creds: - id_server = creds["idServer"] - else: - raise SynapseError(400, "No id_server in creds") + client_secret, id_server, id_access_token = self._extract_items_from_creds_dict( + creds + ) + + # If an id_access_token is not supplied, force usage of v1 + if id_access_token is None: + use_v2 = False - if "client_secret" in creds: - client_secret = creds["client_secret"] - elif "clientSecret" in creds: - client_secret = creds["clientSecret"] + # Decide which API endpoint URLs to use + bind_data = {"sid": creds["sid"], "client_secret": client_secret, "mxid": mxid} + if use_v2: + bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,) + bind_data["id_access_token"] = id_access_token else: - raise SynapseError(400, "No client_secret in creds") + bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,) try: - data = yield self.http_client.post_json_get_json( - "https://%s%s" % (id_server, "/_matrix/identity/api/v1/3pid/bind"), - {"sid": creds["sid"], "client_secret": client_secret, "mxid": mxid}, - ) + data = yield self.http_client.post_json_get_json(bind_url, bind_data) logger.debug("bound threepid %r to %s", creds, mxid) # Remember where we bound the threepid @@ -131,9 +198,18 @@ class IdentityHandler(BaseHandler): address=data["address"], id_server=id_server, ) + + return data + except HttpResponseException as e: + if e.code != 404 or not use_v2: + logger.error("3PID bind failed with Matrix error: %r", e) + raise e.to_synapse_error() except CodeMessageException as e: data = json.loads(e.msg) # XXX WAT? - return data + return data + + logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", bind_url) + return (yield self.bind_threepid(creds, mxid, use_v2=False)) @defer.inlineCallbacks def try_unbind_threepid(self, mxid, threepid): @@ -189,6 +265,8 @@ class IdentityHandler(BaseHandler): server doesn't support unbinding """ url = "https://%s/_matrix/identity/api/v1/3pid/unbind" % (id_server,) + url_bytes = "/_matrix/identity/api/v1/3pid/unbind".encode("ascii") + content = { "mxid": mxid, "threepid": {"medium": threepid["medium"], "address": threepid["address"]}, @@ -200,7 +278,7 @@ class IdentityHandler(BaseHandler): auth_headers = self.federation_http_client.build_auth_headers( destination=None, method="POST", - url_bytes="/_matrix/identity/api/v1/3pid/unbind".encode("ascii"), + url_bytes=url_bytes, content=content, destination_is=id_server, ) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index a4be518006..e9cc953bdd 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -542,15 +542,16 @@ class ThreepidRestServlet(RestServlet): def on_POST(self, request): body = parse_json_object_from_request(request) - threePidCreds = body.get("threePidCreds") - threePidCreds = body.get("three_pid_creds", threePidCreds) - if threePidCreds is None: - raise SynapseError(400, "Missing param", Codes.MISSING_PARAM) + threepid_creds = body.get("threePidCreds") or body.get("three_pid_creds") + if threepid_creds is None: + raise SynapseError( + 400, "Missing param three_pid_creds", Codes.MISSING_PARAM + ) requester = yield self.auth.get_user_by_req(request) user_id = requester.user.to_string() - threepid = yield self.identity_handler.threepid_from_creds(threePidCreds) + threepid = yield self.identity_handler.threepid_from_creds(threepid_creds) if not threepid: raise SynapseError(400, "Failed to auth 3pid", Codes.THREEPID_AUTH_FAILED) @@ -566,7 +567,7 @@ class ThreepidRestServlet(RestServlet): if "bind" in body and body["bind"]: logger.debug("Binding threepid %s to %s", threepid, user_id) - yield self.identity_handler.bind_threepid(threePidCreds, user_id) + yield self.identity_handler.bind_threepid(threepid_creds, user_id) return 200, {} -- cgit 1.4.1 From 1d65292e94077390af0ad9c5ee8cd8b0db9b357c Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 5 Sep 2019 14:41:04 +0100 Subject: Link the send loop with the edus contexts The contexts were being filtered too early so the send loop wasn't being linked to them unless the destination was whitelisted. --- synapse/federation/sender/transaction_manager.py | 11 ++++++++--- synapse/federation/units.py | 3 +++ synapse/handlers/devicemessage.py | 5 +---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 62ca6a3e87..42f46394bc 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -26,6 +26,7 @@ from synapse.logging.opentracing import ( set_tag, start_active_span_follows_from, tags, + whitelisted_homeserver, ) from synapse.util.metrics import measure_func @@ -59,9 +60,13 @@ class TransactionManager(object): # The span_contexts is a generator so that it won't be evaluated if # opentracing is disabled. (Yay speed!) - span_contexts = ( - extract_text_map(json.loads(edu.get_context())) for edu in pending_edus - ) + span_contexts = [] + keep_destination = whitelisted_homeserver(destination) + + for edu in pending_edus: + span_contexts.append(extract_text_map(json.loads(edu.get_context()))) + if keep_destination: + edu.strip_context() with start_active_span_follows_from("send_transaction", span_contexts): diff --git a/synapse/federation/units.py b/synapse/federation/units.py index aa84621206..b4d743cde7 100644 --- a/synapse/federation/units.py +++ b/synapse/federation/units.py @@ -41,6 +41,9 @@ class Edu(JsonEncodedObject): def get_context(self): return getattr(self, "content", {}).get("org.matrix.opentracing_context", "{}") + def strip_context(self): + getattr(self, "content", {})["org.matrix.opentracing_context"] = "{}" + class Transaction(JsonEncodedObject): """ A transaction is a list of Pdus and Edus to be sent to a remote home diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index 01731cb2d0..0043cbea17 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -25,7 +25,6 @@ from synapse.logging.opentracing import ( log_kv, set_tag, start_active_span, - whitelisted_homeserver, ) from synapse.types import UserID, get_domain_from_id from synapse.util.stringutils import random_string @@ -121,9 +120,7 @@ class DeviceMessageHandler(object): "sender": sender_user_id, "type": message_type, "message_id": message_id, - "org.matrix.opentracing_context": json.dumps(context) - if whitelisted_homeserver(destination) - else None, + "org.matrix.opentracing_context": json.dumps(context), } log_kv({"local_messages": local_messages}) -- cgit 1.4.1 From 93bc9d73bfc3fafa1862ba0cc65fd31bfbb1add9 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 5 Sep 2019 14:45:07 +0100 Subject: newsfile --- changelog.d/5984.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5984.bugfix diff --git a/changelog.d/5984.bugfix b/changelog.d/5984.bugfix new file mode 100644 index 0000000000..98dcfb188e --- /dev/null +++ b/changelog.d/5984.bugfix @@ -0,0 +1 @@ +Link send loop opentracing to edu contexts regardles of destination. -- cgit 1.4.1 From 909827b422eb3396f905a1fb7ad1732f9727d500 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 5 Sep 2019 14:46:04 +0100 Subject: Add opentracing to all client servlets (#5983) --- changelog.d/5983.feature | 1 + synapse/federation/transport/server.py | 6 +++++- synapse/http/server.py | 13 ++++++++++++- synapse/http/servlet.py | 6 +----- synapse/logging/opentracing.py | 2 +- synapse/replication/http/_base.py | 16 ++++++---------- 6 files changed, 26 insertions(+), 18 deletions(-) create mode 100644 changelog.d/5983.feature diff --git a/changelog.d/5983.feature b/changelog.d/5983.feature new file mode 100644 index 0000000000..aa23ee6dcd --- /dev/null +++ b/changelog.d/5983.feature @@ -0,0 +1 @@ +Add minimum opentracing for client servlets. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index f9930b6460..132a8fb5e6 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -342,7 +342,11 @@ class BaseFederationServlet(object): continue server.register_paths( - method, (pattern,), self._wrap(code), self.__class__.__name__ + method, + (pattern,), + self._wrap(code), + self.__class__.__name__, + trace=False, ) diff --git a/synapse/http/server.py b/synapse/http/server.py index e6f351ba3b..cb9158fe1b 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -40,6 +40,7 @@ from synapse.api.errors import ( UnrecognizedRequestError, ) from synapse.logging.context import preserve_fn +from synapse.logging.opentracing import trace_servlet from synapse.util.caches import intern_dict logger = logging.getLogger(__name__) @@ -257,7 +258,9 @@ class JsonResource(HttpServer, resource.Resource): self.path_regexs = {} self.hs = hs - def register_paths(self, method, path_patterns, callback, servlet_classname): + def register_paths( + self, method, path_patterns, callback, servlet_classname, trace=True + ): """ Registers a request handler against a regular expression. Later request URLs are checked against these regular expressions in order to identify an appropriate @@ -273,8 +276,16 @@ class JsonResource(HttpServer, resource.Resource): servlet_classname (str): The name of the handler to be used in prometheus and opentracing logs. + + trace (bool): Whether we should start a span to trace the servlet. """ method = method.encode("utf-8") # method is bytes on py3 + + if trace: + # We don't extract the context from the servlet because we can't + # trust the sender + callback = trace_servlet(servlet_classname)(callback) + for path_pattern in path_patterns: logger.debug("Registering for %s %s", method, path_pattern.pattern) self.path_regexs.setdefault(method, []).append( diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index c186b31f59..274c1a6a87 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -20,7 +20,6 @@ import logging from canonicaljson import json from synapse.api.errors import Codes, SynapseError -from synapse.logging.opentracing import trace_servlet logger = logging.getLogger(__name__) @@ -298,10 +297,7 @@ class RestServlet(object): servlet_classname = self.__class__.__name__ method_handler = getattr(self, "on_%s" % (method,)) http_server.register_paths( - method, - patterns, - trace_servlet(servlet_classname)(method_handler), - servlet_classname, + method, patterns, method_handler, servlet_classname ) else: diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index dbf80e2024..2c34b54702 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -319,7 +319,7 @@ def whitelisted_homeserver(destination): Args: destination (str) """ - _homeserver_whitelist + if _homeserver_whitelist: return _homeserver_whitelist.match(destination) return False diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index c4be9273f6..afc9a8ff29 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -22,13 +22,13 @@ from six.moves import urllib from twisted.internet import defer -import synapse.logging.opentracing as opentracing from synapse.api.errors import ( CodeMessageException, HttpResponseException, RequestSendFailed, SynapseError, ) +from synapse.logging.opentracing import inject_active_span_byte_dict, trace_servlet from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import random_string @@ -167,9 +167,7 @@ class ReplicationEndpoint(object): # the master, and so whether we should clean up or not. while True: headers = {} - opentracing.inject_active_span_byte_dict( - headers, None, check_destination=False - ) + inject_active_span_byte_dict(headers, None, check_destination=False) try: result = yield request_func(uri, data, headers=headers) break @@ -210,13 +208,11 @@ class ReplicationEndpoint(object): args = "/".join("(?P<%s>[^/]+)" % (arg,) for arg in url_args) pattern = re.compile("^/_synapse/replication/%s/%s$" % (self.NAME, args)) + handler = trace_servlet(self.__class__.__name__, extract_context=True)(handler) + # We don't let register paths trace this servlet using the default tracing + # options because we wish to extract the context explicitly. http_server.register_paths( - method, - [pattern], - opentracing.trace_servlet(self.__class__.__name__, extract_context=True)( - handler - ), - self.__class__.__name__, + method, [pattern], handler, self.__class__.__name__, trace=False ) def _cached_handler(self, request, txn_id, **kwargs): -- cgit 1.4.1 From 5ade977d0836a9d7615b4e8cc578c48a26198ee8 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 5 Sep 2019 15:06:13 +0100 Subject: Opentracing context cannot be none --- synapse/storage/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 41f62828bd..79a58df591 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -856,7 +856,7 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): "ts": now, "opentracing_context": json.dumps(context) if whitelisted_homeserver(destination) - else None, + else "{}", } for destination in hosts for device_id in device_ids -- cgit 1.4.1 From 7093790fbc805162a5ebb36973aa52313aa1153b Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 5 Sep 2019 15:07:00 +0100 Subject: Bugfix phrasing Co-Authored-By: Erik Johnston --- changelog.d/5984.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/5984.bugfix b/changelog.d/5984.bugfix index 98dcfb188e..3387bf82bb 100644 --- a/changelog.d/5984.bugfix +++ b/changelog.d/5984.bugfix @@ -1 +1 @@ -Link send loop opentracing to edu contexts regardles of destination. +Fix sending of EDUs when opentracing is enabled with an empty whitelist. -- cgit 1.4.1 From ef20aa52ebb03b322e72a2fc4fefd21a373593a6 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 5 Sep 2019 15:07:17 +0100 Subject: use access methods (duh..) Co-Authored-By: Erik Johnston --- synapse/federation/sender/transaction_manager.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 42f46394bc..5b6c79c51a 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -64,7 +64,9 @@ class TransactionManager(object): keep_destination = whitelisted_homeserver(destination) for edu in pending_edus: - span_contexts.append(extract_text_map(json.loads(edu.get_context()))) + context = edu.get_context() + if context: + span_contexts.append(extract_text_map(json.loads(context))) if keep_destination: edu.strip_context() -- cgit 1.4.1 From 3ff0422d2dbfa668df365da99a4b7caeea85528d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Sep 2019 17:16:03 +0100 Subject: Make redaction retention period configurable --- docs/sample_config.yaml | 5 +++++ synapse/config/server.py | 15 +++++++++++++++ synapse/storage/events.py | 6 ++++-- tests/storage/test_redaction.py | 4 +++- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 43969bbb70..e23b80d2b8 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -306,6 +306,11 @@ listeners: # #allow_per_room_profiles: false +# How long to keep redacted events in unredacted form in the database. +# By default redactions are kept indefinitely. +# +#redaction_retention_period: 30d + ## TLS ## diff --git a/synapse/config/server.py b/synapse/config/server.py index 2abdef0971..8efab924d4 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -162,6 +162,16 @@ class ServerConfig(Config): self.mau_trial_days = config.get("mau_trial_days", 0) + # How long to keep redacted events in the database in unredacted form + # before redacting them. + redaction_retention_period = config.get("redaction_retention_period") + if redaction_retention_period: + self.redaction_retention_period = self.parse_duration( + redaction_retention_period + ) + else: + self.redaction_retention_period = None + # Options to disable HS self.hs_disabled = config.get("hs_disabled", False) self.hs_disabled_message = config.get("hs_disabled_message", "") @@ -718,6 +728,11 @@ class ServerConfig(Config): # Defaults to 'true'. # #allow_per_room_profiles: false + + # How long to keep redacted events in unredacted form in the database. + # By default redactions are kept indefinitely. + # + #redaction_retention_period: 30d """ % locals() ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2970da6829..d0d1781c90 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1566,10 +1566,12 @@ class EventsStore( Deferred """ - if self.stream_ordering_month_ago is None: + if not self.hs.config.redaction_retention_period: return - max_pos = self.stream_ordering_month_ago + max_pos = yield self.find_first_stream_ordering_after_ts( + self._clock.time_msec() - self.hs.config.redaction_retention_period + ) # We fetch all redactions that point to an event that we have that has # a stream ordering from over a month ago, that we haven't yet censored diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 0c9f3c7071..f0e86d41a8 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -344,7 +344,9 @@ class RedactionTestCase(unittest.HomeserverTestCase): {"content": {"body": "t", "msgtype": "message"}}, json.loads(event_json) ) - # Advance by 30 days + # Advance by 30 days, then advance again to ensure that the looping call + # for updating the stream position gets called and then the looping call + # for the censoring gets called. self.reactor.advance(60 * 60 * 24 * 31) self.reactor.advance(60 * 60 * 2) -- cgit 1.4.1 From ad9b64b4969537ac339469152eaa437bcf4b6609 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Sep 2019 17:17:47 +0100 Subject: Fix test --- tests/storage/test_redaction.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index f0e86d41a8..deecfad9fb 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -31,8 +31,10 @@ from tests.utils import create_room class RedactionTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor, clock): + config = self.default_config() + config["redaction_retention_period"] = "30d" return self.setup_test_homeserver( - resource_for_federation=Mock(), http_client=None + resource_for_federation=Mock(), http_client=None, config=config ) def prepare(self, reactor, clock, hs): -- cgit 1.4.1 From bc604e7f9428a288816c284f6562ed08d2c4c540 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 5 Sep 2019 17:33:29 +0100 Subject: Gracefully handle log context slips and missing opentracing import errors. (#5988) --- changelog.d/5988.bugfix | 1 + synapse/logging/opentracing.py | 82 +++++++++++++++++++++++++++++------------- 2 files changed, 59 insertions(+), 24 deletions(-) create mode 100644 changelog.d/5988.bugfix diff --git a/changelog.d/5988.bugfix b/changelog.d/5988.bugfix new file mode 100644 index 0000000000..5c3597cb53 --- /dev/null +++ b/changelog.d/5988.bugfix @@ -0,0 +1 @@ +Fix invalid references to None while opentracing if the log context slips. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 2c34b54702..8c574ddd28 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -239,8 +239,7 @@ _homeserver_whitelist = None def only_if_tracing(func): - """Executes the function only if we're tracing. Otherwise return. - Assumes the function wrapped may return None""" + """Executes the function only if we're tracing. Otherwise returns None.""" @wraps(func) def _only_if_tracing_inner(*args, **kwargs): @@ -252,6 +251,41 @@ def only_if_tracing(func): return _only_if_tracing_inner +def ensure_active_span(message, ret=None): + """Executes the operation only if opentracing is enabled and there is an active span. + If there is no active span it logs message at the error level. + + Args: + message (str): Message which fills in "There was no active span when trying to %s" + in the error log if there is no active span and opentracing is enabled. + ret (object): return value if opentracing is None or there is no active span. + + Returns (object): The result of the func or ret if opentracing is disabled or there + was no active span. + """ + + def ensure_active_span_inner_1(func): + @wraps(func) + def ensure_active_span_inner_2(*args, **kwargs): + if not opentracing: + return ret + + if not opentracing.tracer.active_span: + logger.error( + "There was no active span when trying to %s." + " Did you forget to start one or did a context slip?", + message, + ) + + return ret + + return func(*args, **kwargs) + + return ensure_active_span_inner_2 + + return ensure_active_span_inner_1 + + @contextlib.contextmanager def _noop_context_manager(*args, **kwargs): """Does exactly what it says on the tin""" @@ -349,26 +383,24 @@ def start_active_span( if opentracing is None: return _noop_context_manager() - else: - # We need to enter the scope here for the logcontext to become active - return opentracing.tracer.start_active_span( - operation_name, - child_of=child_of, - references=references, - tags=tags, - start_time=start_time, - ignore_active_span=ignore_active_span, - finish_on_close=finish_on_close, - ) + return opentracing.tracer.start_active_span( + operation_name, + child_of=child_of, + references=references, + tags=tags, + start_time=start_time, + ignore_active_span=ignore_active_span, + finish_on_close=finish_on_close, + ) def start_active_span_follows_from(operation_name, contexts): if opentracing is None: return _noop_context_manager() - else: - references = [opentracing.follows_from(context) for context in contexts] - scope = start_active_span(operation_name, references=references) - return scope + + references = [opentracing.follows_from(context) for context in contexts] + scope = start_active_span(operation_name, references=references) + return scope def start_active_span_from_request( @@ -465,19 +497,19 @@ def start_active_span_from_edu( # Opentracing setters for tags, logs, etc -@only_if_tracing +@ensure_active_span("set a tag") def set_tag(key, value): """Sets a tag on the active span""" opentracing.tracer.active_span.set_tag(key, value) -@only_if_tracing +@ensure_active_span("log") def log_kv(key_values, timestamp=None): """Log to the active span""" opentracing.tracer.active_span.log_kv(key_values, timestamp) -@only_if_tracing +@ensure_active_span("set the traces operation name") def set_operation_name(operation_name): """Sets the operation name of the active span""" opentracing.tracer.active_span.set_operation_name(operation_name) @@ -486,7 +518,7 @@ def set_operation_name(operation_name): # Injection and extraction -@only_if_tracing +@ensure_active_span("inject the span into a header") def inject_active_span_twisted_headers(headers, destination, check_destination=True): """ Injects a span context into twisted headers in-place @@ -522,7 +554,7 @@ def inject_active_span_twisted_headers(headers, destination, check_destination=T headers.addRawHeaders(key, value) -@only_if_tracing +@ensure_active_span("inject the span into a byte dict") def inject_active_span_byte_dict(headers, destination, check_destination=True): """ Injects a span context into a dict where the headers are encoded as byte @@ -559,7 +591,7 @@ def inject_active_span_byte_dict(headers, destination, check_destination=True): headers[key.encode()] = [value.encode()] -@only_if_tracing +@ensure_active_span("inject the span into a text map") def inject_active_span_text_map(carrier, destination, check_destination=True): """ Injects a span context into a dict @@ -591,6 +623,7 @@ def inject_active_span_text_map(carrier, destination, check_destination=True): ) +@ensure_active_span("get the active span context as a dict", ret={}) def get_active_span_text_map(destination=None): """ Gets a span context as a dict. This can be used instead of manually @@ -603,7 +636,7 @@ def get_active_span_text_map(destination=None): dict: the active span's context if opentracing is enabled, otherwise empty. """ - if not opentracing or (destination and not whitelisted_homeserver(destination)): + if destination and not whitelisted_homeserver(destination): return {} carrier = {} @@ -614,6 +647,7 @@ def get_active_span_text_map(destination=None): return carrier +@ensure_active_span("get the span context as a string.", ret={}) def active_span_context_as_string(): """ Returns: -- cgit 1.4.1 From f7c873a64355506221648d55d64023c83d43f618 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 5 Sep 2019 17:44:55 +0100 Subject: Trace how long it takes for the send trasaction to complete, including retrys (#5986) --- changelog.d/5986.feature | 1 + synapse/http/client.py | 87 ++++++++++++++++++++-------------- synapse/http/matrixfederationclient.py | 1 - synapse/replication/http/_base.py | 7 ++- 4 files changed, 58 insertions(+), 38 deletions(-) create mode 100644 changelog.d/5986.feature diff --git a/changelog.d/5986.feature b/changelog.d/5986.feature new file mode 100644 index 0000000000..f56aec1b32 --- /dev/null +++ b/changelog.d/5986.feature @@ -0,0 +1 @@ +Trace replication send times. diff --git a/synapse/http/client.py b/synapse/http/client.py index 0ae6db8ea7..51765ae3c0 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -46,6 +46,7 @@ from synapse.http import ( redact_uri, ) from synapse.logging.context import make_deferred_yieldable +from synapse.logging.opentracing import set_tag, start_active_span, tags from synapse.util.async_helpers import timeout_deferred from synapse.util.caches import CACHE_SIZE_FACTOR @@ -269,42 +270,56 @@ class SimpleHttpClient(object): # log request but strip `access_token` (AS requests for example include this) logger.info("Sending request %s %s", method, redact_uri(uri)) - try: - body_producer = None - if data is not None: - body_producer = QuieterFileBodyProducer(BytesIO(data)) - - request_deferred = treq.request( - method, - uri, - agent=self.agent, - data=body_producer, - headers=headers, - **self._extra_treq_args - ) - request_deferred = timeout_deferred( - request_deferred, - 60, - self.hs.get_reactor(), - cancelled_to_request_timed_out_error, - ) - response = yield make_deferred_yieldable(request_deferred) - - incoming_responses_counter.labels(method, response.code).inc() - logger.info( - "Received response to %s %s: %s", method, redact_uri(uri), response.code - ) - return response - except Exception as e: - incoming_responses_counter.labels(method, "ERR").inc() - logger.info( - "Error sending request to %s %s: %s %s", - method, - redact_uri(uri), - type(e).__name__, - e.args[0], - ) - raise + with start_active_span( + "outgoing-client-request", + tags={ + tags.SPAN_KIND: tags.SPAN_KIND_RPC_CLIENT, + tags.HTTP_METHOD: method, + tags.HTTP_URL: uri, + }, + finish_on_close=True, + ): + try: + body_producer = None + if data is not None: + body_producer = QuieterFileBodyProducer(BytesIO(data)) + + request_deferred = treq.request( + method, + uri, + agent=self.agent, + data=body_producer, + headers=headers, + **self._extra_treq_args + ) + request_deferred = timeout_deferred( + request_deferred, + 60, + self.hs.get_reactor(), + cancelled_to_request_timed_out_error, + ) + response = yield make_deferred_yieldable(request_deferred) + + incoming_responses_counter.labels(method, response.code).inc() + logger.info( + "Received response to %s %s: %s", + method, + redact_uri(uri), + response.code, + ) + return response + except Exception as e: + incoming_responses_counter.labels(method, "ERR").inc() + logger.info( + "Error sending request to %s %s: %s %s", + method, + redact_uri(uri), + type(e).__name__, + e.args[0], + ) + set_tag(tags.ERROR, True) + set_tag("error_reason", e.args[0]) + raise @defer.inlineCallbacks def post_urlencoded_get_json(self, uri, args={}, headers=None): diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 4326e98a28..3f7c93ffcb 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -345,7 +345,6 @@ class MatrixFederationHttpClient(object): else: query_bytes = b"" - # Retreive current span scope = start_active_span( "outgoing-federation-request", tags={ diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index afc9a8ff29..03560c1f0e 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -28,7 +28,11 @@ from synapse.api.errors import ( RequestSendFailed, SynapseError, ) -from synapse.logging.opentracing import inject_active_span_byte_dict, trace_servlet +from synapse.logging.opentracing import ( + inject_active_span_byte_dict, + trace, + trace_servlet, +) from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import random_string @@ -129,6 +133,7 @@ class ReplicationEndpoint(object): client = hs.get_simple_http_client() + @trace(opname="outgoing_replication_request") @defer.inlineCallbacks def send_request(**kwargs): data = yield cls._serialize_payload(**kwargs) -- cgit 1.4.1 From e47af0f086839c5d22a0de87a32a49386abef8df Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 5 Sep 2019 17:03:14 -0400 Subject: fix test --- tests/handlers/test_e2e_keys.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 316dd6259d..7a59ec5085 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -265,7 +265,9 @@ class E2eKeysHandlerTestCase(unittest.TestCase): device_key_1["signatures"][local_user]["ed25519:abc"] = "base64+signature" device_key_2["signatures"][local_user]["ed25519:def"] = "base64+signature" - devices = yield self.handler.query_devices({"device_keys": {local_user: []}}, 0) + devices = yield self.handler.query_devices( + {"device_keys": {local_user: []}}, 0, 0 + ) del devices["device_keys"][local_user]["abc"]["unsigned"] del devices["device_keys"][local_user]["def"]["unsigned"] self.assertDictEqual(devices["device_keys"][local_user]["abc"], device_key_1) -- cgit 1.4.1 From 369462da7488772ea6d2fdd076ff355bc09db28c Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 5 Sep 2019 17:03:31 -0400 Subject: avoid modifying input parameter --- synapse/handlers/e2e_keys.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index d5d6e6e027..2c21cb9828 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -629,9 +629,9 @@ class E2eKeysHandler(object): # split between checking signatures for own user and signatures for # other users, since we verify them with different keys - if user_id in signatures: - self_signatures = signatures[user_id] - del signatures[user_id] + self_signatures = signatures.get(user_id, {}) + other_signatures = {k: v for k, v in signatures.items() if k != user_id} + if self_signatures: self_device_ids = list(self_signatures.keys()) try: # get our self-signing key to verify the signatures @@ -766,9 +766,9 @@ class E2eKeysHandler(object): } signed_users = [] # what user have been signed, for notifying - if len(signatures): - # if signatures isn't empty, then we have signatures for other - # users. These signatures will be signed by the user signing key + if other_signatures: + # now check non-self signatures. These signatures will be signed + # by the user-signing key try: # get our user-signing key to verify the signatures @@ -776,7 +776,7 @@ class E2eKeysHandler(object): user_id, "user_signing" ) - for user, devicemap in signatures.items(): + for user, devicemap in other_signatures.items(): device_id = None try: # get the user's master key, to make sure it matches -- cgit 1.4.1 From d8517da85b3057e186087de146f546de52d7e206 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 6 Sep 2019 10:07:12 +0100 Subject: Don't assume there is a 'self' arg in @trace decorator --- synapse/logging/opentracing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 8c574ddd28..7246253018 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -702,15 +702,15 @@ def trace(func=None, opname=None): _opname = opname if opname else func.__name__ @wraps(func) - def _trace_inner(self, *args, **kwargs): + def _trace_inner(*args, **kwargs): if opentracing is None: - return func(self, *args, **kwargs) + return func(*args, **kwargs) scope = start_active_span(_opname) scope.__enter__() try: - result = func(self, *args, **kwargs) + result = func(*args, **kwargs) if isinstance(result, defer.Deferred): def call_back(result): @@ -750,13 +750,13 @@ def tag_args(func): return func @wraps(func) - def _tag_args_inner(self, *args, **kwargs): + def _tag_args_inner(*args, **kwargs): argspec = inspect.getargspec(func) for i, arg in enumerate(argspec.args[1:]): set_tag("ARG_" + arg, args[i]) set_tag("args", args[len(argspec.args) :]) set_tag("kwargs", kwargs) - return func(self, *args, **kwargs) + return func(*args, **kwargs) return _tag_args_inner -- cgit 1.4.1 From 4bc6b7130d60005d21ce9be16682c13e90447430 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 6 Sep 2019 10:13:10 +0100 Subject: Newsfile --- changelog.d/5991.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5991.bugfix diff --git a/changelog.d/5991.bugfix b/changelog.d/5991.bugfix new file mode 100644 index 0000000000..b5ba2f9e46 --- /dev/null +++ b/changelog.d/5991.bugfix @@ -0,0 +1 @@ +Fix broken replication requests when opentracing is enabled. -- cgit 1.4.1 From e5baf80237b1ac2dbaac43b623ab75d1b5c8185a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 6 Sep 2019 10:53:05 +0100 Subject: Update changelog --- changelog.d/5991.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/5991.bugfix b/changelog.d/5991.bugfix index b5ba2f9e46..5c3597cb53 100644 --- a/changelog.d/5991.bugfix +++ b/changelog.d/5991.bugfix @@ -1 +1 @@ -Fix broken replication requests when opentracing is enabled. +Fix invalid references to None while opentracing if the log context slips. -- cgit 1.4.1 From 0c0b82b6d18102694f9ff1c40b94e5dd124c21d8 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:35:28 +0100 Subject: Allow Synapse to send registration emails + choose Synapse or an external server to handle 3pid validation (#5987) This is a combination of a few different PRs, finally all being merged into `develop`: * #5875 * #5876 * #5868 (This one added the `/versions` flag but the flag itself was actually [backed out](https://github.com/matrix-org/synapse/commit/891afb57cbdf9867f2848341b29c75d6f35eef5a#diff-e591d42d30690ffb79f63bb726200891) in #5969. What's left is just giving /versions access to the config file, which could be useful in the future) * #5835 * #5969 * #5940 Clients should not actually use the new registration functionality until https://github.com/matrix-org/synapse/pull/5972 is merged. UPGRADE.rst, changelog entries and config file changes should all be reviewed closely before this PR is merged. --- UPGRADE.rst | 63 ++++++ changelog.d/5835.feature | 1 + changelog.d/5868.feature | 1 + changelog.d/5875.misc | 1 + changelog.d/5876.feature | 1 + changelog.d/5940.feature | 1 + changelog.d/5969.feature | 1 + contrib/cmdclient/console.py | 2 + docs/sample_config.yaml | 56 +++-- synapse/app/client_reader.py | 2 +- synapse/config/emailconfig.py | 122 +++++++--- synapse/config/registration.py | 36 +++ synapse/handlers/account_validity.py | 12 +- synapse/handlers/auth.py | 34 +-- synapse/handlers/identity.py | 178 +++++++++++---- synapse/push/mailer.py | 83 +++++-- synapse/push/pusher.py | 17 +- synapse/res/templates/password_reset.html | 2 +- synapse/res/templates/password_reset.txt | 4 +- synapse/res/templates/password_reset_failure.html | 4 +- synapse/res/templates/registration.html | 11 + synapse/res/templates/registration.txt | 10 + synapse/res/templates/registration_failure.html | 6 + synapse/res/templates/registration_success.html | 6 + synapse/rest/__init__.py | 2 +- synapse/rest/client/v2_alpha/_base.py | 2 + synapse/rest/client/v2_alpha/account.py | 260 ++++++++++------------ synapse/rest/client/v2_alpha/register.py | 196 ++++++++++++++-- synapse/rest/client/versions.py | 8 +- 29 files changed, 820 insertions(+), 302 deletions(-) create mode 100644 changelog.d/5835.feature create mode 100644 changelog.d/5868.feature create mode 100644 changelog.d/5875.misc create mode 100644 changelog.d/5876.feature create mode 100644 changelog.d/5940.feature create mode 100644 changelog.d/5969.feature create mode 100644 synapse/res/templates/registration.html create mode 100644 synapse/res/templates/registration.txt create mode 100644 synapse/res/templates/registration_failure.html create mode 100644 synapse/res/templates/registration_success.html diff --git a/UPGRADE.rst b/UPGRADE.rst index cf228c7c52..dddcd75fda 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -49,6 +49,56 @@ returned by the Client-Server API: # configured on port 443. curl -kv https:///_matrix/client/versions 2>&1 | grep "Server:" +Upgrading to v1.4.0 +=================== + +Config options +-------------- + +**Note: Registration by email address or phone number will not work in this release unless +some config options are changed from their defaults.** + +This is due to Synapse v1.4.0 now defaulting to sending registration and password reset tokens +itself. This is for security reasons as well as putting less reliance on identity servers. +However, currently Synapse only supports sending emails, and does not have support for +phone-based password reset or account registration. If Synapse is configured to handle these on +its own, phone-based password resets and registration will be disabled. For Synapse to send +emails, the ``email`` block of the config must be filled out. If not, then password resets and +registration via email will be disabled entirely. + +This release also deprecates the ``email.trust_identity_server_for_password_resets`` option and +replaces it with the ``account_threepid_delegates`` dictionary. This option defines whether the +homeserver should delegate an external server (typically an `identity server +`_) to handle sending password reset or +registration messages via email and SMS. + +If ``email.trust_identity_server_for_password_resets`` is set to ``true``, and +``account_threepid_delegates.email`` is not set, then the first entry in +``trusted_third_party_id_servers`` will be used as the account threepid delegate for email. +This is to ensure compatibility with existing Synapse installs that set up external server +handling for these tasks before v1.4.0. If ``email.trust_identity_server_for_password_resets`` +is ``true`` and no trusted identity server domains are configured, Synapse will throw an error. + +If ``email.trust_identity_server_for_password_resets`` is ``false`` or absent and a threepid +type in ``account_threepid_delegates`` is not set to a domain, then Synapse will attempt to +send password reset and registration messages for that type. + +Email templates +--------------- + +If you have configured a custom template directory with the ``email.template_dir`` option, be +aware that there are new templates regarding registration. ``registration.html`` and +``registration.txt`` have been added and contain the content that is sent to a client upon +registering via an email address. + +``registration_success.html`` and ``registration_failure.html`` are also new HTML templates +that will be shown to the user when they click the link in their registration emai , either +showing them a success or failure page (assuming a redirect URL is not configured). + +Synapse will expect these files to exist inside the configured template directory. To view the +default templates, see `synapse/res/templates +`_. + Upgrading to v1.2.0 =================== @@ -132,6 +182,19 @@ server for password resets, set ``trust_identity_server_for_password_resets`` to See the `sample configuration file `_ for more details on these settings. +New email templates +--------------- +Some new templates have been added to the default template directory for the purpose of the +homeserver sending its own password reset emails. If you have configured a custom +``template_dir`` in your Synapse config, these files will need to be added. + +``password_reset.html`` and ``password_reset.txt`` are HTML and plain text templates +respectively that contain the contents of what will be emailed to the user upon attempting to +reset their password via email. ``password_reset_success.html`` and +``password_reset_failure.html`` are HTML files that the content of which (assuming no redirect +URL is set) will be shown to the user after they attempt to click the link in the email sent +to them. + Upgrading to v0.99.0 ==================== diff --git a/changelog.d/5835.feature b/changelog.d/5835.feature new file mode 100644 index 0000000000..3e8bf5068d --- /dev/null +++ b/changelog.d/5835.feature @@ -0,0 +1 @@ +Add the ability to send registration emails from the homeserver rather than delegating to an identity server. diff --git a/changelog.d/5868.feature b/changelog.d/5868.feature new file mode 100644 index 0000000000..69605c1ae1 --- /dev/null +++ b/changelog.d/5868.feature @@ -0,0 +1 @@ +Add `m.require_identity_server` key to `/versions`'s `unstable_features` section. \ No newline at end of file diff --git a/changelog.d/5875.misc b/changelog.d/5875.misc new file mode 100644 index 0000000000..e188c28d2f --- /dev/null +++ b/changelog.d/5875.misc @@ -0,0 +1 @@ +Deprecate the `trusted_third_party_id_servers` option. \ No newline at end of file diff --git a/changelog.d/5876.feature b/changelog.d/5876.feature new file mode 100644 index 0000000000..df88193fbd --- /dev/null +++ b/changelog.d/5876.feature @@ -0,0 +1 @@ +Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`. \ No newline at end of file diff --git a/changelog.d/5940.feature b/changelog.d/5940.feature new file mode 100644 index 0000000000..5b69b97fe7 --- /dev/null +++ b/changelog.d/5940.feature @@ -0,0 +1 @@ +Add the ability to send registration emails from the homeserver rather than delegating to an identity server. \ No newline at end of file diff --git a/changelog.d/5969.feature b/changelog.d/5969.feature new file mode 100644 index 0000000000..cf603fa0c6 --- /dev/null +++ b/changelog.d/5969.feature @@ -0,0 +1 @@ +Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`. diff --git a/contrib/cmdclient/console.py b/contrib/cmdclient/console.py index 899c650b0c..48da410d94 100755 --- a/contrib/cmdclient/console.py +++ b/contrib/cmdclient/console.py @@ -37,6 +37,8 @@ from signedjson.sign import verify_signed_json, SignatureVerifyException CONFIG_JSON = "cmdclient_config.json" +# TODO: The concept of trusted identity servers has been deprecated. This option and checks +# should be removed TRUSTED_ID_SERVERS = ["localhost:8001"] diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 43969bbb70..186cdbedd2 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -891,10 +891,42 @@ uploads_path: "DATADIR/uploads" # Also defines the ID server which will be called when an account is # deactivated (one will be picked arbitrarily). # +# Note: This option is deprecated. Since v0.99.4, Synapse has tracked which identity +# server a 3PID has been bound to. For 3PIDs bound before then, Synapse runs a +# background migration script, informing itself that the identity server all of its +# 3PIDs have been bound to is likely one of the below. +# +# As of Synapse v1.4.0, all other functionality of this option has been deprecated, and +# it is now solely used for the purposes of the background migration script, and can be +# removed once it has run. #trusted_third_party_id_servers: # - matrix.org # - vector.im +# Handle threepid (email/phone etc) registration and password resets through a set of +# *trusted* identity servers. Note that this allows the configured identity server to +# reset passwords for accounts! +# +# Be aware that if `email` is not set, and SMTP options have not been +# configured in the email config block, registration and user password resets via +# email will be globally disabled. +# +# Additionally, if `msisdn` is not set, registration and password resets via msisdn +# will be disabled regardless. This is due to Synapse currently not supporting any +# method of sending SMS messages on its own. +# +# To enable using an identity server for operations regarding a particular third-party +# identifier type, set the value to the URL of that identity server as shown in the +# examples below. +# +# Servers handling the these requests must answer the `/requestToken` endpoints defined +# by the Matrix Identity Service API specification: +# https://matrix.org/docs/spec/identity_service/latest +# +account_threepid_delegates: + #email: https://example.com # Delegate email sending to matrix.org + #msisdn: http://localhost:8090 # Delegate SMS sending to this local process + # Users who register on this homeserver will automatically be joined # to these rooms # @@ -1164,19 +1196,6 @@ password_config: # # # riot_base_url: "http://localhost/riot" # -# # Enable sending password reset emails via the configured, trusted -# # identity servers -# # -# # IMPORTANT! This will give a malicious or overtaken identity server -# # the ability to reset passwords for your users! Make absolutely sure -# # that you want to do this! It is strongly recommended that password -# # reset emails be sent by the homeserver instead -# # -# # If this option is set to false and SMTP options have not been -# # configured, resetting user passwords via email will be disabled -# # -# #trust_identity_server_for_password_resets: false -# # # Configure the time that a validation email or text message code # # will expire after sending # # @@ -1208,11 +1227,22 @@ password_config: # #password_reset_template_html: password_reset.html # #password_reset_template_text: password_reset.txt # +# # Templates for registration emails sent by the homeserver +# # +# #registration_template_html: registration.html +# #registration_template_text: registration.txt +# # # Templates for password reset success and failure pages that a user # # will see after attempting to reset their password # # # #password_reset_template_success_html: password_reset_success.html # #password_reset_template_failure_html: password_reset_failure.html +# +# # Templates for registration success and failure pages that a user +# # will see after attempting to register using an email or phone +# # +# #registration_template_success_html: registration_success.html +# #registration_template_failure_html: registration_failure.html #password_providers: diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 86193d35a8..dbcc414c42 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -119,7 +119,7 @@ class ClientReaderServer(HomeServer): KeyChangesServlet(self).register(resource) VoipRestServlet(self).register(resource) PushRuleRestServlet(self).register(resource) - VersionsRestServlet().register(resource) + VersionsRestServlet(self).register(resource) resources.update({"/_matrix/client": resource}) diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index f83c05df44..e5de768b0c 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -20,6 +20,7 @@ from __future__ import print_function # This file can't be called email.py because if it is, we cannot: import email.utils import os +from enum import Enum import pkg_resources @@ -74,19 +75,48 @@ class EmailConfig(Config): "renew_at" ) - email_trust_identity_server_for_password_resets = email_config.get( - "trust_identity_server_for_password_resets", False + self.threepid_behaviour_email = ( + # Have Synapse handle the email sending if account_threepid_delegates.email + # is not defined + # msisdn is currently always remote while Synapse does not support any method of + # sending SMS messages + ThreepidBehaviour.REMOTE + if self.account_threepid_delegate_email + else ThreepidBehaviour.LOCAL ) - self.email_password_reset_behaviour = ( - "remote" if email_trust_identity_server_for_password_resets else "local" - ) - self.password_resets_were_disabled_due_to_email_config = False - if self.email_password_reset_behaviour == "local" and email_config == {}: + # Prior to Synapse v1.4.0, there was another option that defined whether Synapse would + # use an identity server to password reset tokens on its behalf. We now warn the user + # if they have this set and tell them to use the updated option, while using a default + # identity server in the process. + self.using_identity_server_from_trusted_list = False + if ( + not self.account_threepid_delegate_email + and config.get("trust_identity_server_for_password_resets", False) is True + ): + # Use the first entry in self.trusted_third_party_id_servers instead + if self.trusted_third_party_id_servers: + # XXX: It's a little confusing that account_threepid_delegate_email is modified + # both in RegistrationConfig and here. We should factor this bit out + self.account_threepid_delegate_email = self.trusted_third_party_id_servers[ + 0 + ] + self.using_identity_server_from_trusted_list = True + else: + raise ConfigError( + "Attempted to use an identity server from" + '"trusted_third_party_id_servers" but it is empty.' + ) + + self.local_threepid_handling_disabled_due_to_email_config = False + if ( + self.threepid_behaviour_email == ThreepidBehaviour.LOCAL + and email_config == {} + ): # We cannot warn the user this has happened here # Instead do so when a user attempts to reset their password - self.password_resets_were_disabled_due_to_email_config = True + self.local_threepid_handling_disabled_due_to_email_config = True - self.email_password_reset_behaviour = "off" + self.threepid_behaviour_email = ThreepidBehaviour.OFF # Get lifetime of a validation token in milliseconds self.email_validation_token_lifetime = self.parse_duration( @@ -96,7 +126,7 @@ class EmailConfig(Config): if ( self.email_enable_notifs or account_validity_renewal_enabled - or self.email_password_reset_behaviour == "local" + or self.threepid_behaviour_email == ThreepidBehaviour.LOCAL ): # make sure we can import the required deps import jinja2 @@ -106,7 +136,7 @@ class EmailConfig(Config): jinja2 bleach - if self.email_password_reset_behaviour == "local": + if self.threepid_behaviour_email == ThreepidBehaviour.LOCAL: required = ["smtp_host", "smtp_port", "notif_from"] missing = [] @@ -125,28 +155,45 @@ class EmailConfig(Config): % (", ".join(missing),) ) - # Templates for password reset emails + # These email templates have placeholders in them, and thus must be + # parsed using a templating engine during a request self.email_password_reset_template_html = email_config.get( "password_reset_template_html", "password_reset.html" ) self.email_password_reset_template_text = email_config.get( "password_reset_template_text", "password_reset.txt" ) + self.email_registration_template_html = email_config.get( + "registration_template_html", "registration.html" + ) + self.email_registration_template_text = email_config.get( + "registration_template_text", "registration.txt" + ) self.email_password_reset_template_failure_html = email_config.get( "password_reset_template_failure_html", "password_reset_failure.html" ) - # This template does not support any replaceable variables, so we will - # read it from the disk once during setup + self.email_registration_template_failure_html = email_config.get( + "registration_template_failure_html", "registration_failure.html" + ) + + # These templates do not support any placeholder variables, so we + # will read them from disk once during setup email_password_reset_template_success_html = email_config.get( "password_reset_template_success_html", "password_reset_success.html" ) + email_registration_template_success_html = email_config.get( + "registration_template_success_html", "registration_success.html" + ) # Check templates exist for f in [ self.email_password_reset_template_html, self.email_password_reset_template_text, + self.email_registration_template_html, + self.email_registration_template_text, self.email_password_reset_template_failure_html, email_password_reset_template_success_html, + email_registration_template_success_html, ]: p = os.path.join(self.email_template_dir, f) if not os.path.isfile(p): @@ -156,9 +203,15 @@ class EmailConfig(Config): filepath = os.path.join( self.email_template_dir, email_password_reset_template_success_html ) - self.email_password_reset_template_success_html_content = self.read_file( + self.email_password_reset_template_success_html = self.read_file( filepath, "email.password_reset_template_success_html" ) + filepath = os.path.join( + self.email_template_dir, email_registration_template_success_html + ) + self.email_registration_template_success_html_content = self.read_file( + filepath, "email.registration_template_success_html" + ) if self.email_enable_notifs: required = [ @@ -239,19 +292,6 @@ class EmailConfig(Config): # # # riot_base_url: "http://localhost/riot" # - # # Enable sending password reset emails via the configured, trusted - # # identity servers - # # - # # IMPORTANT! This will give a malicious or overtaken identity server - # # the ability to reset passwords for your users! Make absolutely sure - # # that you want to do this! It is strongly recommended that password - # # reset emails be sent by the homeserver instead - # # - # # If this option is set to false and SMTP options have not been - # # configured, resetting user passwords via email will be disabled - # # - # #trust_identity_server_for_password_resets: false - # # # Configure the time that a validation email or text message code # # will expire after sending # # @@ -283,9 +323,35 @@ class EmailConfig(Config): # #password_reset_template_html: password_reset.html # #password_reset_template_text: password_reset.txt # + # # Templates for registration emails sent by the homeserver + # # + # #registration_template_html: registration.html + # #registration_template_text: registration.txt + # # # Templates for password reset success and failure pages that a user # # will see after attempting to reset their password # # # #password_reset_template_success_html: password_reset_success.html # #password_reset_template_failure_html: password_reset_failure.html + # + # # Templates for registration success and failure pages that a user + # # will see after attempting to register using an email or phone + # # + # #registration_template_success_html: registration_success.html + # #registration_template_failure_html: registration_failure.html """ + + +class ThreepidBehaviour(Enum): + """ + Enum to define the behaviour of Synapse with regards to when it contacts an identity + server for 3pid registration and password resets + + REMOTE = use an external server to send tokens + LOCAL = send tokens ourselves + OFF = disable registration via 3pid and password resets + """ + + REMOTE = "remote" + LOCAL = "local" + OFF = "off" diff --git a/synapse/config/registration.py b/synapse/config/registration.py index e2bee3c116..9548560edb 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -99,6 +99,10 @@ class RegistrationConfig(Config): self.trusted_third_party_id_servers = config.get( "trusted_third_party_id_servers", ["matrix.org", "vector.im"] ) + account_threepid_delegates = config.get("account_threepid_delegates") or {} + self.account_threepid_delegate_email = account_threepid_delegates.get("email") + self.account_threepid_delegate_msisdn = account_threepid_delegates.get("msisdn") + self.default_identity_server = config.get("default_identity_server") self.allow_guest_access = config.get("allow_guest_access", False) @@ -257,10 +261,42 @@ class RegistrationConfig(Config): # Also defines the ID server which will be called when an account is # deactivated (one will be picked arbitrarily). # + # Note: This option is deprecated. Since v0.99.4, Synapse has tracked which identity + # server a 3PID has been bound to. For 3PIDs bound before then, Synapse runs a + # background migration script, informing itself that the identity server all of its + # 3PIDs have been bound to is likely one of the below. + # + # As of Synapse v1.4.0, all other functionality of this option has been deprecated, and + # it is now solely used for the purposes of the background migration script, and can be + # removed once it has run. #trusted_third_party_id_servers: # - matrix.org # - vector.im + # Handle threepid (email/phone etc) registration and password resets through a set of + # *trusted* identity servers. Note that this allows the configured identity server to + # reset passwords for accounts! + # + # Be aware that if `email` is not set, and SMTP options have not been + # configured in the email config block, registration and user password resets via + # email will be globally disabled. + # + # Additionally, if `msisdn` is not set, registration and password resets via msisdn + # will be disabled regardless. This is due to Synapse currently not supporting any + # method of sending SMS messages on its own. + # + # To enable using an identity server for operations regarding a particular third-party + # identifier type, set the value to the URL of that identity server as shown in the + # examples below. + # + # Servers handling the these requests must answer the `/requestToken` endpoints defined + # by the Matrix Identity Service API specification: + # https://matrix.org/docs/spec/identity_service/latest + # + account_threepid_delegates: + #email: https://example.com # Delegate email sending to matrix.org + #msisdn: http://localhost:8090 # Delegate SMS sending to this local process + # Users who register on this homeserver will automatically be joined # to these rooms # diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index 34574f1a12..d04e0fe576 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -38,6 +38,7 @@ logger = logging.getLogger(__name__) class AccountValidityHandler(object): def __init__(self, hs): self.hs = hs + self.config = hs.config self.store = self.hs.get_datastore() self.sendmail = self.hs.get_sendmail() self.clock = self.hs.get_clock() @@ -62,9 +63,14 @@ class AccountValidityHandler(object): self._raw_from = email.utils.parseaddr(self._from_string)[1] self._template_html, self._template_text = load_jinja2_templates( - config=self.hs.config, - template_html_name=self.hs.config.email_expiry_template_html, - template_text_name=self.hs.config.email_expiry_template_text, + self.config.email_template_dir, + [ + self.config.email_expiry_template_html, + self.config.email_expiry_template_text, + ], + apply_format_ts_filter=True, + apply_mxc_to_http_filter=True, + public_baseurl=self.config.public_baseurl, ) # Check the renewal emails to send and send them every 30min. diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index f844409d21..d0c0142740 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -38,6 +38,7 @@ from synapse.api.errors import ( UserDeactivatedError, ) from synapse.api.ratelimiting import Ratelimiter +from synapse.config.emailconfig import ThreepidBehaviour from synapse.logging.context import defer_to_thread from synapse.module_api import ModuleApi from synapse.types import UserID @@ -158,7 +159,7 @@ class AuthHandler(BaseHandler): return params @defer.inlineCallbacks - def check_auth(self, flows, clientdict, clientip, password_servlet=False): + def check_auth(self, flows, clientdict, clientip): """ Takes a dictionary sent by the client in the login / registration protocol and handles the User-Interactive Auth flow. @@ -182,16 +183,6 @@ class AuthHandler(BaseHandler): clientip (str): The IP address of the client. - password_servlet (bool): Whether the request originated from - PasswordRestServlet. - XXX: This is a temporary hack to distinguish between checking - for threepid validations locally (in the case of password - resets) and using the identity server (in the case of binding - a 3PID during registration). Once we start using the - homeserver for both tasks, this distinction will no longer be - necessary. - - Returns: defer.Deferred[dict, dict, str]: a deferred tuple of (creds, params, session_id). @@ -247,9 +238,7 @@ class AuthHandler(BaseHandler): if "type" in authdict: login_type = authdict["type"] try: - result = yield self._check_auth_dict( - authdict, clientip, password_servlet=password_servlet - ) + result = yield self._check_auth_dict(authdict, clientip) if result: creds[login_type] = result self._save_session(session) @@ -356,7 +345,7 @@ class AuthHandler(BaseHandler): return sess.setdefault("serverdict", {}).get(key, default) @defer.inlineCallbacks - def _check_auth_dict(self, authdict, clientip, password_servlet=False): + def _check_auth_dict(self, authdict, clientip): """Attempt to validate the auth dict provided by a client Args: @@ -374,11 +363,7 @@ class AuthHandler(BaseHandler): login_type = authdict["type"] checker = self.checkers.get(login_type) if checker is not None: - # XXX: Temporary workaround for having Synapse handle password resets - # See AuthHandler.check_auth for further details - res = yield checker( - authdict, clientip=clientip, password_servlet=password_servlet - ) + res = yield checker(authdict, clientip=clientip) return res # build a v1-login-style dict out of the authdict and fall back to the @@ -449,7 +434,7 @@ class AuthHandler(BaseHandler): return defer.succeed(True) @defer.inlineCallbacks - def _check_threepid(self, medium, authdict, password_servlet=False, **kwargs): + def _check_threepid(self, medium, authdict, **kwargs): if "threepid_creds" not in authdict: raise LoginError(400, "Missing threepid_creds", Codes.MISSING_PARAM) @@ -458,12 +443,9 @@ class AuthHandler(BaseHandler): identity_handler = self.hs.get_handlers().identity_handler logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) - if ( - not password_servlet - or self.hs.config.email_password_reset_behaviour == "remote" - ): + if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: threepid = yield identity_handler.threepid_from_creds(threepid_creds) - elif self.hs.config.email_password_reset_behaviour == "local": + elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: row = yield self.store.get_threepid_validation_session( medium, threepid_creds["client_secret"], diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 583b612dd9..71b5a87392 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -29,6 +29,7 @@ from synapse.api.errors import ( HttpResponseException, SynapseError, ) +from synapse.util.stringutils import random_string from ._base import BaseHandler @@ -41,25 +42,7 @@ class IdentityHandler(BaseHandler): self.http_client = hs.get_simple_http_client() self.federation_http_client = hs.get_http_client() - - self.trusted_id_servers = set(hs.config.trusted_third_party_id_servers) - self.trust_any_id_server_just_for_testing_do_not_use = ( - hs.config.use_insecure_ssl_client_just_for_testing_do_not_use - ) - - def _should_trust_id_server(self, id_server): - if id_server not in self.trusted_id_servers: - if self.trust_any_id_server_just_for_testing_do_not_use: - logger.warn( - "Trusting untrustworthy ID server %r even though it isn't" - " in the trusted id list for testing because" - " 'use_insecure_ssl_client_just_for_testing_do_not_use'" - " is set in the config", - id_server, - ) - else: - return False - return True + self.hs = hs def _extract_items_from_creds_dict(self, creds): """ @@ -132,13 +115,6 @@ class IdentityHandler(BaseHandler): "/_matrix/identity/api/v1/3pid/getValidated3pid", ) - if not self._should_trust_id_server(id_server): - logger.warn( - "%s is not a trusted ID server: rejecting 3pid " + "credentials", - id_server, - ) - return None - try: data = yield self.http_client.get_json(url, query_params) return data if "medium" in data else None @@ -305,28 +281,122 @@ class IdentityHandler(BaseHandler): return changed + @defer.inlineCallbacks + def send_threepid_validation( + self, + email_address, + client_secret, + send_attempt, + send_email_func, + next_link=None, + ): + """Send a threepid validation email for password reset or + registration purposes + + Args: + email_address (str): The user's email address + client_secret (str): The provided client secret + send_attempt (int): Which send attempt this is + send_email_func (func): A function that takes an email address, token, + client_secret and session_id, sends an email + and returns a Deferred. + next_link (str|None): The URL to redirect the user to after validation + + Returns: + The new session_id upon success + + Raises: + SynapseError is an error occurred when sending the email + """ + # Check that this email/client_secret/send_attempt combo is new or + # greater than what we've seen previously + session = yield self.store.get_threepid_validation_session( + "email", client_secret, address=email_address, validated=False + ) + + # Check to see if a session already exists and that it is not yet + # marked as validated + if session and session.get("validated_at") is None: + session_id = session["session_id"] + last_send_attempt = session["last_send_attempt"] + + # Check that the send_attempt is higher than previous attempts + if send_attempt <= last_send_attempt: + # If not, just return a success without sending an email + return session_id + else: + # An non-validated session does not exist yet. + # Generate a session id + session_id = random_string(16) + + # Generate a new validation token + token = random_string(32) + + # Send the mail with the link containing the token, client_secret + # and session_id + try: + yield send_email_func(email_address, token, client_secret, session_id) + except Exception: + logger.exception( + "Error sending threepid validation email to %s", email_address + ) + raise SynapseError(500, "An error was encountered when sending the email") + + token_expires = ( + self.hs.clock.time_msec() + self.hs.config.email_validation_token_lifetime + ) + + yield self.store.start_or_continue_validation_session( + "email", + email_address, + session_id, + client_secret, + send_attempt, + next_link, + token, + token_expires, + ) + + return session_id + @defer.inlineCallbacks def requestEmailToken( self, id_server, email, client_secret, send_attempt, next_link=None ): - if not self._should_trust_id_server(id_server): - raise SynapseError( - 400, "Untrusted ID server '%s'" % id_server, Codes.SERVER_NOT_TRUSTED - ) + """ + Request an external server send an email on our behalf for the purposes of threepid + validation. + + Args: + id_server (str): The identity server to proxy to + email (str): The email to send the message to + client_secret (str): The unique client_secret sends by the user + send_attempt (int): Which attempt this is + next_link: A link to redirect the user to once they submit the token + Returns: + The json response body from the server + """ params = { "email": email, "client_secret": client_secret, "send_attempt": send_attempt, } - if next_link: - params.update({"next_link": next_link}) + params["next_link"] = next_link + + if self.hs.config.using_identity_server_from_trusted_list: + # Warn that a deprecated config option is in use + logger.warn( + 'The config option "trust_identity_server_for_password_resets" ' + 'has been replaced by "account_threepid_delegate". ' + "Please consult the sample config at docs/sample_config.yaml for " + "details and update your config file." + ) try: data = yield self.http_client.post_json_get_json( - "https://%s%s" - % (id_server, "/_matrix/identity/api/v1/validate/email/requestToken"), + id_server + "/_matrix/identity/api/v1/validate/email/requestToken", params, ) return data @@ -336,25 +406,49 @@ class IdentityHandler(BaseHandler): @defer.inlineCallbacks def requestMsisdnToken( - self, id_server, country, phone_number, client_secret, send_attempt, **kwargs + self, + id_server, + country, + phone_number, + client_secret, + send_attempt, + next_link=None, ): - if not self._should_trust_id_server(id_server): - raise SynapseError( - 400, "Untrusted ID server '%s'" % id_server, Codes.SERVER_NOT_TRUSTED - ) + """ + Request an external server send an SMS message on our behalf for the purposes of + threepid validation. + Args: + id_server (str): The identity server to proxy to + country (str): The country code of the phone number + phone_number (str): The number to send the message to + client_secret (str): The unique client_secret sends by the user + send_attempt (int): Which attempt this is + next_link: A link to redirect the user to once they submit the token + Returns: + The json response body from the server + """ params = { "country": country, "phone_number": phone_number, "client_secret": client_secret, "send_attempt": send_attempt, } - params.update(kwargs) + if next_link: + params["next_link"] = next_link + + if self.hs.config.using_identity_server_from_trusted_list: + # Warn that a deprecated config option is in use + logger.warn( + 'The config option "trust_identity_server_for_password_resets" ' + 'has been replaced by "account_threepid_delegate". ' + "Please consult the sample config at docs/sample_config.yaml for " + "details and update your config file." + ) try: data = yield self.http_client.post_json_get_json( - "https://%s%s" - % (id_server, "/_matrix/identity/api/v1/validate/msisdn/requestToken"), + id_server + "/_matrix/identity/api/v1/validate/msisdn/requestToken", params, ) return data diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 4245ce26f3..3dfd527849 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -131,14 +131,11 @@ class Mailer(object): email_address (str): Email address we're sending the password reset to token (str): Unique token generated by the server to verify - password reset email was received + the email was received client_secret (str): Unique token generated by the client to group together multiple email sending attempts sid (str): The generated session ID """ - if email.utils.parseaddr(email_address)[1] == "": - raise RuntimeError("Invalid 'to' email address") - link = ( self.hs.config.public_baseurl + "_matrix/client/unstable/password_reset/email/submit_token" @@ -149,7 +146,34 @@ class Mailer(object): yield self.send_email( email_address, - "[%s] Password Reset Email" % self.hs.config.server_name, + "[%s] Password Reset" % self.hs.config.server_name, + template_vars, + ) + + @defer.inlineCallbacks + def send_registration_mail(self, email_address, token, client_secret, sid): + """Send an email with a registration confirmation link to a user + + Args: + email_address (str): Email address we're sending the registration + link to + token (str): Unique token generated by the server to verify + the email was received + client_secret (str): Unique token generated by the client to + group together multiple email sending attempts + sid (str): The generated session ID + """ + link = ( + self.hs.config.public_baseurl + + "_matrix/client/unstable/registration/email/submit_token" + "?token=%s&client_secret=%s&sid=%s" % (token, client_secret, sid) + ) + + template_vars = {"link": link} + + yield self.send_email( + email_address, + "[%s] Register your Email Address" % self.hs.config.server_name, template_vars, ) @@ -605,25 +629,50 @@ def format_ts_filter(value, format): return time.strftime(format, time.localtime(value / 1000)) -def load_jinja2_templates(config, template_html_name, template_text_name): - """Load the jinja2 email templates from disk +def load_jinja2_templates( + template_dir, + template_filenames, + apply_format_ts_filter=False, + apply_mxc_to_http_filter=False, + public_baseurl=None, +): + """Loads and returns one or more jinja2 templates and applies optional filters + + Args: + template_dir (str): The directory where templates are stored + template_filenames (list[str]): A list of template filenames + apply_format_ts_filter (bool): Whether to apply a template filter that formats + timestamps + apply_mxc_to_http_filter (bool): Whether to apply a template filter that converts + mxc urls to http urls + public_baseurl (str|None): The public baseurl of the server. Required for + apply_mxc_to_http_filter to be enabled Returns: - (template_html, template_text) + A list of jinja2 templates corresponding to the given list of filenames, + with order preserved """ - logger.info("loading email templates from '%s'", config.email_template_dir) - loader = jinja2.FileSystemLoader(config.email_template_dir) + logger.info( + "loading email templates %s from '%s'", template_filenames, template_dir + ) + loader = jinja2.FileSystemLoader(template_dir) env = jinja2.Environment(loader=loader) - env.filters["format_ts"] = format_ts_filter - env.filters["mxc_to_http"] = _create_mxc_to_http_filter(config) - template_html = env.get_template(template_html_name) - template_text = env.get_template(template_text_name) + if apply_format_ts_filter: + env.filters["format_ts"] = format_ts_filter + + if apply_mxc_to_http_filter and public_baseurl: + env.filters["mxc_to_http"] = _create_mxc_to_http_filter(public_baseurl) + + templates = [] + for template_filename in template_filenames: + template = env.get_template(template_filename) + templates.append(template) - return template_html, template_text + return templates -def _create_mxc_to_http_filter(config): +def _create_mxc_to_http_filter(public_baseurl): def mxc_to_http_filter(value, width, height, resize_method="crop"): if value[0:6] != "mxc://": return "" @@ -636,7 +685,7 @@ def _create_mxc_to_http_filter(config): params = {"width": width, "height": height, "method": resize_method} return "%s_matrix/media/v1/thumbnail/%s?%s%s" % ( - config.public_baseurl, + public_baseurl, serverAndMediaId, urllib.parse.urlencode(params), fragment or "", diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index a9c64a9c54..f277aeb131 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -35,6 +35,7 @@ except Exception: class PusherFactory(object): def __init__(self, hs): self.hs = hs + self.config = hs.config self.pusher_types = {"http": HttpPusher} @@ -42,12 +43,16 @@ class PusherFactory(object): if hs.config.email_enable_notifs: self.mailers = {} # app_name -> Mailer - templates = load_jinja2_templates( - config=hs.config, - template_html_name=hs.config.email_notif_template_html, - template_text_name=hs.config.email_notif_template_text, + self.notif_template_html, self.notif_template_text = load_jinja2_templates( + self.config.email_template_dir, + [ + self.config.email_notif_template_html, + self.config.email_notif_template_text, + ], + apply_format_ts_filter=True, + apply_mxc_to_http_filter=True, + public_baseurl=self.config.public_baseurl, ) - self.notif_template_html, self.notif_template_text = templates self.pusher_types["email"] = self._create_email_pusher @@ -78,6 +83,6 @@ class PusherFactory(object): if "data" in pusherdict and "brand" in pusherdict["data"]: app_name = pusherdict["data"]["brand"] else: - app_name = self.hs.config.email_app_name + app_name = self.config.email_app_name return app_name diff --git a/synapse/res/templates/password_reset.html b/synapse/res/templates/password_reset.html index 4fa7b36734..a197bf872c 100644 --- a/synapse/res/templates/password_reset.html +++ b/synapse/res/templates/password_reset.html @@ -4,6 +4,6 @@ {{ link }} -

If this was not you, please disregard this email and contact your server administrator. Thank you.

+

If this was not you, do not click the link above and instead contact your server administrator. Thank you.

diff --git a/synapse/res/templates/password_reset.txt b/synapse/res/templates/password_reset.txt index f0deff59a7..6aa6527560 100644 --- a/synapse/res/templates/password_reset.txt +++ b/synapse/res/templates/password_reset.txt @@ -3,5 +3,5 @@ was you, please click the link below to confirm resetting your password: {{ link }} -If this was not you, please disregard this email and contact your server -administrator. Thank you. +If this was not you, DO NOT click the link above and instead contact your +server administrator. Thank you. diff --git a/synapse/res/templates/password_reset_failure.html b/synapse/res/templates/password_reset_failure.html index 0b132cf8db..9e3c4446e3 100644 --- a/synapse/res/templates/password_reset_failure.html +++ b/synapse/res/templates/password_reset_failure.html @@ -1,6 +1,8 @@ -

{{ failure_reason }}. Your password has not been reset.

+

The request failed for the following reason: {{ failure_reason }}.

+ +

Your password has not been reset.

diff --git a/synapse/res/templates/registration.html b/synapse/res/templates/registration.html new file mode 100644 index 0000000000..16730a527f --- /dev/null +++ b/synapse/res/templates/registration.html @@ -0,0 +1,11 @@ + + +

You have asked us to register this email with a new Matrix account. If this was you, please click the link below to confirm your email address:

+ + Verify Your Email Address + +

If this was not you, you can safely disregard this email.

+ +

Thank you.

+ + diff --git a/synapse/res/templates/registration.txt b/synapse/res/templates/registration.txt new file mode 100644 index 0000000000..cb4f16a90c --- /dev/null +++ b/synapse/res/templates/registration.txt @@ -0,0 +1,10 @@ +Hello there, + +You have asked us to register this email with a new Matrix account. If this +was you, please click the link below to confirm your email address: + +{{ link }} + +If this was not you, you can safely disregard this email. + +Thank you. diff --git a/synapse/res/templates/registration_failure.html b/synapse/res/templates/registration_failure.html new file mode 100644 index 0000000000..2833d79c37 --- /dev/null +++ b/synapse/res/templates/registration_failure.html @@ -0,0 +1,6 @@ + + + +

Validation failed for the following reason: {{ failure_reason }}.

+ + diff --git a/synapse/res/templates/registration_success.html b/synapse/res/templates/registration_success.html new file mode 100644 index 0000000000..fbd6e4018f --- /dev/null +++ b/synapse/res/templates/registration_success.html @@ -0,0 +1,6 @@ + + + +

Your email has now been validated, please return to your client. You may now close this window.

+ + diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 1d20b96d03..4a1fc2ec2b 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -73,7 +73,7 @@ class ClientRestResource(JsonResource): @staticmethod def register_servlets(client_resource, hs): - versions.register_servlets(client_resource) + versions.register_servlets(hs, client_resource) # Deprecated in r0 initial_sync.register_servlets(hs, client_resource) diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index e3d59ac3ac..8250ae0ae1 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -37,6 +37,7 @@ def client_patterns(path_regex, releases=(0,), unstable=True, v1=False): SRE_Pattern """ patterns = [] + if unstable: unstable_prefix = CLIENT_API_PREFIX + "/unstable" patterns.append(re.compile("^" + unstable_prefix + path_regex)) @@ -46,6 +47,7 @@ def client_patterns(path_regex, releases=(0,), unstable=True, v1=False): for release in releases: new_prefix = CLIENT_API_PREFIX + "/r%d" % (release,) patterns.append(re.compile("^" + new_prefix + path_regex)) + return patterns diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index e9cc953bdd..997557dfb0 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -18,12 +18,11 @@ import logging from six.moves import http_client -import jinja2 - from twisted.internet import defer from synapse.api.constants import LoginType from synapse.api.errors import Codes, SynapseError, ThreepidValidationError +from synapse.config.emailconfig import ThreepidBehaviour from synapse.http.server import finish_request from synapse.http.servlet import ( RestServlet, @@ -31,8 +30,8 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.push.mailer import Mailer, load_jinja2_templates from synapse.util.msisdn import phone_number_to_msisdn -from synapse.util.stringutils import random_string from synapse.util.threepids import check_3pid_allowed from ._base import client_patterns, interactive_auth_handler @@ -50,25 +49,28 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): self.config = hs.config self.identity_handler = hs.get_handlers().identity_handler - if self.config.email_password_reset_behaviour == "local": - from synapse.push.mailer import Mailer, load_jinja2_templates - - templates = load_jinja2_templates( - config=hs.config, - template_html_name=hs.config.email_password_reset_template_html, - template_text_name=hs.config.email_password_reset_template_text, + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + template_html, template_text = load_jinja2_templates( + self.config.email_template_dir, + [ + self.config.email_password_reset_template_html, + self.config.email_password_reset_template_text, + ], + apply_format_ts_filter=True, + apply_mxc_to_http_filter=True, + public_baseurl=self.config.public_baseurl, ) self.mailer = Mailer( hs=self.hs, app_name=self.config.email_app_name, - template_html=templates[0], - template_text=templates[1], + template_html=template_html, + template_text=template_text, ) @defer.inlineCallbacks def on_POST(self, request): - if self.config.email_password_reset_behaviour == "off": - if self.config.password_resets_were_disabled_due_to_email_config: + if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.local_threepid_handling_disabled_due_to_email_config: logger.warn( "User password resets have been disabled due to lack of email config" ) @@ -93,25 +95,39 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): Codes.THREEPID_DENIED, ) - existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( + existing_user_id = yield self.hs.get_datastore().get_user_id_by_threepid( "email", email ) - if existingUid is None: + if existing_user_id is None: raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND) - if self.config.email_password_reset_behaviour == "remote": - if "id_server" not in body: - raise SynapseError(400, "Missing 'id_server' param in body") + if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + # Have the configured identity server handle the request + if not self.hs.config.account_threepid_delegate_email: + logger.warn( + "No upstream email account_threepid_delegate configured on the server to " + "handle this request" + ) + raise SynapseError( + 400, "Password reset by email is not supported on this homeserver" + ) - # Have the identity server handle the password reset flow ret = yield self.identity_handler.requestEmailToken( - body["id_server"], email, client_secret, send_attempt, next_link + self.hs.config.account_threepid_delegate_email, + email, + client_secret, + send_attempt, + next_link, ) else: # Send password reset emails from Synapse - sid = yield self.send_password_reset( - email, client_secret, send_attempt, next_link + sid = yield self.identity_handler.send_threepid_validation( + email, + client_secret, + send_attempt, + self.mailer.send_password_reset_mail, + next_link, ) # Wrap the session id in a JSON object @@ -119,74 +135,6 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): return 200, ret - @defer.inlineCallbacks - def send_password_reset(self, email, client_secret, send_attempt, next_link=None): - """Send a password reset email - - Args: - email (str): The user's email address - client_secret (str): The provided client secret - send_attempt (int): Which send attempt this is - - Returns: - The new session_id upon success - - Raises: - SynapseError is an error occurred when sending the email - """ - # Check that this email/client_secret/send_attempt combo is new or - # greater than what we've seen previously - session = yield self.datastore.get_threepid_validation_session( - "email", client_secret, address=email, validated=False - ) - - # Check to see if a session already exists and that it is not yet - # marked as validated - if session and session.get("validated_at") is None: - session_id = session["session_id"] - last_send_attempt = session["last_send_attempt"] - - # Check that the send_attempt is higher than previous attempts - if send_attempt <= last_send_attempt: - # If not, just return a success without sending an email - return session_id - else: - # An non-validated session does not exist yet. - # Generate a session id - session_id = random_string(16) - - # Generate a new validation token - token = random_string(32) - - # Send the mail with the link containing the token, client_secret - # and session_id - try: - yield self.mailer.send_password_reset_mail( - email, token, client_secret, session_id - ) - except Exception: - logger.exception("Error sending a password reset email to %s", email) - raise SynapseError( - 500, "An error was encountered when sending the password reset email" - ) - - token_expires = ( - self.hs.clock.time_msec() + self.config.email_validation_token_lifetime - ) - - yield self.datastore.start_or_continue_validation_session( - "email", - email, - session_id, - client_secret, - send_attempt, - next_link, - token, - token_expires, - ) - - return session_id - class MsisdnPasswordRequestTokenRestServlet(RestServlet): PATTERNS = client_patterns("/account/password/msisdn/requestToken$") @@ -202,11 +150,15 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): body = parse_json_object_from_request(request) assert_params_in_dict( - body, - ["id_server", "client_secret", "country", "phone_number", "send_attempt"], + body, ["client_secret", "country", "phone_number", "send_attempt"] ) + client_secret = body["client_secret"] + country = body["country"] + phone_number = body["phone_number"] + send_attempt = body["send_attempt"] + next_link = body.get("next_link") # Optional param - msisdn = phone_number_to_msisdn(body["country"], body["phone_number"]) + msisdn = phone_number_to_msisdn(country, phone_number) if not check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( @@ -215,12 +167,32 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): Codes.THREEPID_DENIED, ) - existingUid = yield self.datastore.get_user_id_by_threepid("msisdn", msisdn) + existing_user_id = yield self.datastore.get_user_id_by_threepid( + "msisdn", msisdn + ) - if existingUid is None: + if existing_user_id is None: raise SynapseError(400, "MSISDN not found", Codes.THREEPID_NOT_FOUND) - ret = yield self.identity_handler.requestMsisdnToken(**body) + if not self.hs.config.account_threepid_delegate_msisdn: + logger.warn( + "No upstream msisdn account_threepid_delegate configured on the server to " + "handle this request" + ) + raise SynapseError( + 400, + "Password reset by phone number is not supported on this homeserver", + ) + + ret = yield self.identity_handler.requestMsisdnToken( + self.hs.config.account_threepid_delegate_msisdn, + country, + phone_number, + client_secret, + send_attempt, + next_link, + ) + return 200, ret @@ -241,31 +213,32 @@ class PasswordResetSubmitTokenServlet(RestServlet): self.auth = hs.get_auth() self.config = hs.config self.clock = hs.get_clock() - self.datastore = hs.get_datastore() + self.store = hs.get_datastore() @defer.inlineCallbacks def on_GET(self, request, medium): + # We currently only handle threepid token submissions for email if medium != "email": raise SynapseError( 400, "This medium is currently not supported for password resets" ) - if self.config.email_password_reset_behaviour == "off": - if self.config.password_resets_were_disabled_due_to_email_config: + if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.local_threepid_handling_disabled_due_to_email_config: logger.warn( - "User password resets have been disabled due to lack of email config" + "Password reset emails have been disabled due to lack of an email config" ) raise SynapseError( - 400, "Email-based password resets have been disabled on this server" + 400, "Email-based password resets are disabled on this server" ) - sid = parse_string(request, "sid") - client_secret = parse_string(request, "client_secret") - token = parse_string(request, "token") + sid = parse_string(request, "sid", required=True) + client_secret = parse_string(request, "client_secret", required=True) + token = parse_string(request, "token", required=True) - # Attempt to validate a 3PID sesssion + # Attempt to validate a 3PID session try: # Mark the session as valid - next_link = yield self.datastore.validate_threepid_session( + next_link = yield self.store.validate_threepid_session( sid, client_secret, token, self.clock.time_msec() ) @@ -282,38 +255,22 @@ class PasswordResetSubmitTokenServlet(RestServlet): return None # Otherwise show the success template - html = self.config.email_password_reset_template_success_html_content + html = self.config.email_password_reset_template_success_html request.setResponseCode(200) except ThreepidValidationError as e: + request.setResponseCode(e.code) + # Show a failure page with a reason - html = self.load_jinja2_template( + html_template = load_jinja2_templates( self.config.email_template_dir, - self.config.email_password_reset_template_failure_html, - template_vars={"failure_reason": e.msg}, + [self.config.email_password_reset_template_failure_html], ) - request.setResponseCode(e.code) + + template_vars = {"failure_reason": e.msg} + html = html_template.render(**template_vars) request.write(html.encode("utf-8")) finish_request(request) - return None - - def load_jinja2_template(self, template_dir, template_filename, template_vars): - """Loads a jinja2 template with variables to insert - - Args: - template_dir (str): The directory where templates are stored - template_filename (str): The name of the template in the template_dir - template_vars (Dict): Dictionary of keys in the template - alongside their values to insert - - Returns: - str containing the contents of the rendered template - """ - loader = jinja2.FileSystemLoader(template_dir) - env = jinja2.Environment(loader=loader) - - template = env.get_template(template_filename) - return template.render(**template_vars) @defer.inlineCallbacks def on_POST(self, request, medium): @@ -325,7 +282,7 @@ class PasswordResetSubmitTokenServlet(RestServlet): body = parse_json_object_from_request(request) assert_params_in_dict(body, ["sid", "client_secret", "token"]) - valid, _ = yield self.datastore.validate_threepid_validation_token( + valid, _ = yield self.store.validate_threepid_session( body["sid"], body["client_secret"], body["token"], self.clock.time_msec() ) response_code = 200 if valid else 400 @@ -371,7 +328,6 @@ class PasswordRestServlet(RestServlet): [[LoginType.EMAIL_IDENTITY], [LoginType.MSISDN]], body, self.hs.get_ip_from_request(request), - password_servlet=True, ) if LoginType.EMAIL_IDENTITY in result: @@ -454,10 +410,11 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid/email/requestToken$") def __init__(self, hs): - self.hs = hs super(EmailThreepidRequestTokenRestServlet, self).__init__() + self.hs = hs + self.config = hs.config self.identity_handler = hs.get_handlers().identity_handler - self.datastore = self.hs.get_datastore() + self.store = self.hs.get_datastore() @defer.inlineCallbacks def on_POST(self, request): @@ -465,22 +422,29 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): assert_params_in_dict( body, ["id_server", "client_secret", "email", "send_attempt"] ) + id_server = "https://" + body["id_server"] # Assume https + client_secret = body["client_secret"] + email = body["email"] + send_attempt = body["send_attempt"] + next_link = body.get("next_link") # Optional param - if not check_3pid_allowed(self.hs, "email", body["email"]): + if not check_3pid_allowed(self.hs, "email", email): raise SynapseError( 403, "Your email domain is not authorized on this server", Codes.THREEPID_DENIED, ) - existingUid = yield self.datastore.get_user_id_by_threepid( + existing_user_id = yield self.store.get_user_id_by_threepid( "email", body["email"] ) - if existingUid is not None: + if existing_user_id is not None: raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - ret = yield self.identity_handler.requestEmailToken(**body) + ret = yield self.identity_handler.requestEmailToken( + id_server, email, client_secret, send_attempt, next_link + ) return 200, ret @@ -490,8 +454,8 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): def __init__(self, hs): self.hs = hs super(MsisdnThreepidRequestTokenRestServlet, self).__init__() + self.store = self.hs.get_datastore() self.identity_handler = hs.get_handlers().identity_handler - self.datastore = self.hs.get_datastore() @defer.inlineCallbacks def on_POST(self, request): @@ -500,8 +464,14 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): body, ["id_server", "client_secret", "country", "phone_number", "send_attempt"], ) + id_server = "https://" + body["id_server"] # Assume https + client_secret = body["client_secret"] + country = body["country"] + phone_number = body["phone_number"] + send_attempt = body["send_attempt"] + next_link = body.get("next_link") # Optional param - msisdn = phone_number_to_msisdn(body["country"], body["phone_number"]) + msisdn = phone_number_to_msisdn(country, phone_number) if not check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( @@ -510,12 +480,14 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): Codes.THREEPID_DENIED, ) - existingUid = yield self.datastore.get_user_id_by_threepid("msisdn", msisdn) + existing_user_id = yield self.store.get_user_id_by_threepid("msisdn", msisdn) - if existingUid is not None: + if existing_user_id is not None: raise SynapseError(400, "MSISDN is already in use", Codes.THREEPID_IN_USE) - ret = yield self.identity_handler.requestMsisdnToken(**body) + ret = yield self.identity_handler.requestMsisdnToken( + id_server, country, phone_number, client_secret, send_attempt, next_link + ) return 200, ret diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 1ccd2bed2f..7ab534581e 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -28,16 +28,20 @@ from synapse.api.errors import ( Codes, LimitExceededError, SynapseError, + ThreepidValidationError, UnrecognizedRequestError, ) +from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.ratelimiting import FederationRateLimitConfig from synapse.config.server import is_threepid_reserved +from synapse.http.server import finish_request from synapse.http.servlet import ( RestServlet, assert_params_in_dict, parse_json_object_from_request, parse_string, ) +from synapse.push.mailer import load_jinja2_templates from synapse.util.msisdn import phone_number_to_msisdn from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.threepids import check_3pid_allowed @@ -70,30 +74,92 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): super(EmailRegisterRequestTokenRestServlet, self).__init__() self.hs = hs self.identity_handler = hs.get_handlers().identity_handler + self.config = hs.config + + if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + from synapse.push.mailer import Mailer, load_jinja2_templates + + template_html, template_text = load_jinja2_templates( + self.config.email_template_dir, + [ + self.config.email_registration_template_html, + self.config.email_registration_template_text, + ], + apply_format_ts_filter=True, + apply_mxc_to_http_filter=True, + public_baseurl=self.config.public_baseurl, + ) + self.mailer = Mailer( + hs=self.hs, + app_name=self.config.email_app_name, + template_html=template_html, + template_text=template_text, + ) @defer.inlineCallbacks def on_POST(self, request): + if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.hs.config.local_threepid_handling_disabled_due_to_email_config: + logger.warn( + "Email registration has been disabled due to lack of email config" + ) + raise SynapseError( + 400, "Email-based registration has been disabled on this server" + ) body = parse_json_object_from_request(request) - assert_params_in_dict( - body, ["id_server", "client_secret", "email", "send_attempt"] - ) + assert_params_in_dict(body, ["client_secret", "email", "send_attempt"]) - if not check_3pid_allowed(self.hs, "email", body["email"]): + # Extract params from body + client_secret = body["client_secret"] + email = body["email"] + send_attempt = body["send_attempt"] + next_link = body.get("next_link") # Optional param + + if not check_3pid_allowed(self.hs, "email", email): raise SynapseError( 403, "Your email domain is not authorized to register on this server", Codes.THREEPID_DENIED, ) - existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( + existing_user_id = yield self.hs.get_datastore().get_user_id_by_threepid( "email", body["email"] ) - if existingUid is not None: + if existing_user_id is not None: raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - ret = yield self.identity_handler.requestEmailToken(**body) + if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + if not self.hs.config.account_threepid_delegate_email: + logger.warn( + "No upstream email account_threepid_delegate configured on the server to " + "handle this request" + ) + raise SynapseError( + 400, "Registration by email is not supported on this homeserver" + ) + + ret = yield self.identity_handler.requestEmailToken( + self.hs.config.account_threepid_delegate_email, + email, + client_secret, + send_attempt, + next_link, + ) + else: + # Send registration emails from Synapse + sid = yield self.identity_handler.send_threepid_validation( + email, + client_secret, + send_attempt, + self.mailer.send_registration_mail, + next_link, + ) + + # Wrap the session id in a JSON object + ret = {"sid": sid} + return 200, ret @@ -114,11 +180,15 @@ class MsisdnRegisterRequestTokenRestServlet(RestServlet): body = parse_json_object_from_request(request) assert_params_in_dict( - body, - ["id_server", "client_secret", "country", "phone_number", "send_attempt"], + body, ["client_secret", "country", "phone_number", "send_attempt"] ) + client_secret = body["client_secret"] + country = body["country"] + phone_number = body["phone_number"] + send_attempt = body["send_attempt"] + next_link = body.get("next_link") # Optional param - msisdn = phone_number_to_msisdn(body["country"], body["phone_number"]) + msisdn = phone_number_to_msisdn(country, phone_number) if not check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( @@ -127,19 +197,114 @@ class MsisdnRegisterRequestTokenRestServlet(RestServlet): Codes.THREEPID_DENIED, ) - existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( + existing_user_id = yield self.hs.get_datastore().get_user_id_by_threepid( "msisdn", msisdn ) - if existingUid is not None: + if existing_user_id is not None: raise SynapseError( 400, "Phone number is already in use", Codes.THREEPID_IN_USE ) - ret = yield self.identity_handler.requestMsisdnToken(**body) + if not self.hs.config.account_threepid_delegate_msisdn: + logger.warn( + "No upstream msisdn account_threepid_delegate configured on the server to " + "handle this request" + ) + raise SynapseError( + 400, "Registration by phone number is not supported on this homeserver" + ) + + ret = yield self.identity_handler.requestMsisdnToken( + self.hs.config.account_threepid_delegate_msisdn, + country, + phone_number, + client_secret, + send_attempt, + next_link, + ) + return 200, ret +class RegistrationSubmitTokenServlet(RestServlet): + """Handles registration 3PID validation token submission""" + + PATTERNS = client_patterns( + "/registration/(?P[^/]*)/submit_token$", releases=(), unstable=True + ) + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(RegistrationSubmitTokenServlet, self).__init__() + self.hs = hs + self.auth = hs.get_auth() + self.config = hs.config + self.clock = hs.get_clock() + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, medium): + if medium != "email": + raise SynapseError( + 400, "This medium is currently not supported for registration" + ) + if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.local_threepid_handling_disabled_due_to_email_config: + logger.warn( + "User registration via email has been disabled due to lack of email config" + ) + raise SynapseError( + 400, "Email-based registration is disabled on this server" + ) + + sid = parse_string(request, "sid", required=True) + client_secret = parse_string(request, "client_secret", required=True) + token = parse_string(request, "token", required=True) + + # Attempt to validate a 3PID session + try: + # Mark the session as valid + next_link = yield self.store.validate_threepid_session( + sid, client_secret, token, self.clock.time_msec() + ) + + # Perform a 302 redirect if next_link is set + if next_link: + if next_link.startswith("file:///"): + logger.warn( + "Not redirecting to next_link as it is a local file: address" + ) + else: + request.setResponseCode(302) + request.setHeader("Location", next_link) + finish_request(request) + return None + + # Otherwise show the success template + html = self.config.email_registration_template_success_html_content + + request.setResponseCode(200) + except ThreepidValidationError as e: + # Show a failure page with a reason + request.setResponseCode(e.code) + + # Show a failure page with a reason + html_template = load_jinja2_templates( + self.config.email_template_dir, + [self.config.email_registration_template_failure_html], + ) + + template_vars = {"failure_reason": e.msg} + html = html_template.render(**template_vars) + + request.write(html.encode("utf-8")) + finish_request(request) + + class UsernameAvailabilityRestServlet(RestServlet): PATTERNS = client_patterns("/register/available") @@ -438,11 +603,11 @@ class RegisterRestServlet(RestServlet): medium = auth_result[login_type]["medium"] address = auth_result[login_type]["address"] - existingUid = yield self.store.get_user_id_by_threepid( + existing_user_id = yield self.store.get_user_id_by_threepid( medium, address ) - if existingUid is not None: + if existing_user_id is not None: raise SynapseError( 400, "%s is already in use" % medium, @@ -550,4 +715,5 @@ def register_servlets(hs, http_server): EmailRegisterRequestTokenRestServlet(hs).register(http_server) MsisdnRegisterRequestTokenRestServlet(hs).register(http_server) UsernameAvailabilityRestServlet(hs).register(http_server) + RegistrationSubmitTokenServlet(hs).register(http_server) RegisterRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 0e09191632..0058b6b459 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -24,6 +24,10 @@ logger = logging.getLogger(__name__) class VersionsRestServlet(RestServlet): PATTERNS = [re.compile("^/_matrix/client/versions$")] + def __init__(self, hs): + super(VersionsRestServlet, self).__init__() + self.config = hs.config + def on_GET(self, request): return ( 200, @@ -49,5 +53,5 @@ class VersionsRestServlet(RestServlet): ) -def register_servlets(http_server): - VersionsRestServlet().register(http_server) +def register_servlets(hs, http_server): + VersionsRestServlet(hs).register(http_server) -- cgit 1.4.1 From 1ab1479a92d940fc17afd4a5d991ce5a7d210a85 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 6 Sep 2019 13:02:21 +0100 Subject: Add changelog --- changelog.d/5993.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5993.bugfix diff --git a/changelog.d/5993.bugfix b/changelog.d/5993.bugfix new file mode 100644 index 0000000000..d7fb687c6e --- /dev/null +++ b/changelog.d/5993.bugfix @@ -0,0 +1 @@ +Fix a bug where registration via threepid was broken due to the store method being in the wrong place. \ No newline at end of file -- cgit 1.4.1 From e059c5e6487f0e86e395f1e79d400ae0b0d24a82 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 6 Sep 2019 13:10:11 +0100 Subject: Move get_threepid_validation_session into RegistrationWorkerStore --- synapse/storage/registration.py | 108 ++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 2d3c7e2dc9..24509bd455 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -614,6 +614,60 @@ class RegistrationWorkerStore(SQLBaseStore): # Convert the integer into a boolean. return res == 1 + def get_threepid_validation_session( + self, medium, client_secret, address=None, sid=None, validated=True + ): + """Gets a session_id and last_send_attempt (if available) for a + client_secret/medium/(address|session_id) combo + + Args: + medium (str|None): The medium of the 3PID + address (str|None): The address of the 3PID + sid (str|None): The ID of the validation session + client_secret (str|None): A unique string provided by the client to + help identify this validation attempt + validated (bool|None): Whether sessions should be filtered by + whether they have been validated already or not. None to + perform no filtering + + Returns: + deferred {str, int}|None: A dict containing the + latest session_id and send_attempt count for this 3PID. + Otherwise None if there hasn't been a previous attempt + """ + keyvalues = {"medium": medium, "client_secret": client_secret} + if address: + keyvalues["address"] = address + if sid: + keyvalues["session_id"] = sid + + assert address or sid + + def get_threepid_validation_session_txn(txn): + sql = """ + SELECT address, session_id, medium, client_secret, + last_send_attempt, validated_at + FROM threepid_validation_session WHERE %s + """ % ( + " AND ".join("%s = ?" % k for k in iterkeys(keyvalues)), + ) + + if validated is not None: + sql += " AND validated_at IS " + ("NOT NULL" if validated else "NULL") + + sql += " LIMIT 1" + + txn.execute(sql, list(keyvalues.values())) + rows = self.cursor_to_dict(txn) + if not rows: + return None + + return rows[0] + + return self.runInteraction( + "get_threepid_validation_session", get_threepid_validation_session_txn + ) + class RegistrationStore( RegistrationWorkerStore, background_updates.BackgroundUpdateStore @@ -1082,60 +1136,6 @@ class RegistrationStore( return 1 - def get_threepid_validation_session( - self, medium, client_secret, address=None, sid=None, validated=True - ): - """Gets a session_id and last_send_attempt (if available) for a - client_secret/medium/(address|session_id) combo - - Args: - medium (str|None): The medium of the 3PID - address (str|None): The address of the 3PID - sid (str|None): The ID of the validation session - client_secret (str|None): A unique string provided by the client to - help identify this validation attempt - validated (bool|None): Whether sessions should be filtered by - whether they have been validated already or not. None to - perform no filtering - - Returns: - deferred {str, int}|None: A dict containing the - latest session_id and send_attempt count for this 3PID. - Otherwise None if there hasn't been a previous attempt - """ - keyvalues = {"medium": medium, "client_secret": client_secret} - if address: - keyvalues["address"] = address - if sid: - keyvalues["session_id"] = sid - - assert address or sid - - def get_threepid_validation_session_txn(txn): - sql = """ - SELECT address, session_id, medium, client_secret, - last_send_attempt, validated_at - FROM threepid_validation_session WHERE %s - """ % ( - " AND ".join("%s = ?" % k for k in iterkeys(keyvalues)), - ) - - if validated is not None: - sql += " AND validated_at IS " + ("NOT NULL" if validated else "NULL") - - sql += " LIMIT 1" - - txn.execute(sql, list(keyvalues.values())) - rows = self.cursor_to_dict(txn) - if not rows: - return None - - return rows[0] - - return self.runInteraction( - "get_threepid_validation_session", get_threepid_validation_session_txn - ) - def validate_threepid_session(self, session_id, client_secret, token, current_ts): """Attempt to validate a threepid session using a token -- cgit 1.4.1 From 5a7e9fdd846a5c231ab1b1ed9bd12fef7ff5d408 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 6 Sep 2019 13:18:03 +0100 Subject: Change changelog --- changelog.d/5993.bugfix | 1 - changelog.d/5993.feature | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 changelog.d/5993.bugfix create mode 100644 changelog.d/5993.feature diff --git a/changelog.d/5993.bugfix b/changelog.d/5993.bugfix deleted file mode 100644 index d7fb687c6e..0000000000 --- a/changelog.d/5993.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug where registration via threepid was broken due to the store method being in the wrong place. \ No newline at end of file diff --git a/changelog.d/5993.feature b/changelog.d/5993.feature new file mode 100644 index 0000000000..3e8bf5068d --- /dev/null +++ b/changelog.d/5993.feature @@ -0,0 +1 @@ +Add the ability to send registration emails from the homeserver rather than delegating to an identity server. -- cgit 1.4.1 From 6ddda8152ed4f8111d8108f6f2f92f365b9069ba Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 6 Sep 2019 13:23:10 +0100 Subject: Move delete_threepid_session into RegistrationWorkerStore --- synapse/storage/registration.py | 50 ++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 24509bd455..5138792a5f 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -668,6 +668,31 @@ class RegistrationWorkerStore(SQLBaseStore): "get_threepid_validation_session", get_threepid_validation_session_txn ) + def delete_threepid_session(self, session_id): + """Removes a threepid validation session from the database. This can + be done after validation has been performed and whatever action was + waiting on it has been carried out + + Args: + session_id (str): The ID of the session to delete + """ + + def delete_threepid_session_txn(txn): + self._simple_delete_txn( + txn, + table="threepid_validation_token", + keyvalues={"session_id": session_id}, + ) + self._simple_delete_txn( + txn, + table="threepid_validation_session", + keyvalues={"session_id": session_id}, + ) + + return self.runInteraction( + "delete_threepid_session", delete_threepid_session_txn + ) + class RegistrationStore( RegistrationWorkerStore, background_updates.BackgroundUpdateStore @@ -1323,31 +1348,6 @@ class RegistrationStore( self.clock.time_msec(), ) - def delete_threepid_session(self, session_id): - """Removes a threepid validation session from the database. This can - be done after validation has been performed and whatever action was - waiting on it has been carried out - - Args: - session_id (str): The ID of the session to delete - """ - - def delete_threepid_session_txn(txn): - self._simple_delete_txn( - txn, - table="threepid_validation_token", - keyvalues={"session_id": session_id}, - ) - self._simple_delete_txn( - txn, - table="threepid_validation_session", - keyvalues={"session_id": session_id}, - ) - - return self.runInteraction( - "delete_threepid_session", delete_threepid_session_txn - ) - def set_user_deactivated_status_txn(self, txn, user_id, deactivated): self._simple_update_one_txn( txn=txn, -- cgit 1.4.1 From ca74b140f264e8b5429e352ac054288e1f78b980 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 6 Sep 2019 13:25:06 +0100 Subject: Fix destructuring assumption bug --- synapse/rest/client/v2_alpha/account.py | 2 +- synapse/rest/client/v2_alpha/register.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 997557dfb0..5babed7da8 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -261,7 +261,7 @@ class PasswordResetSubmitTokenServlet(RestServlet): request.setResponseCode(e.code) # Show a failure page with a reason - html_template = load_jinja2_templates( + [html_template] = load_jinja2_templates( self.config.email_template_dir, [self.config.email_password_reset_template_failure_html], ) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 7ab534581e..27f4addc41 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -293,7 +293,7 @@ class RegistrationSubmitTokenServlet(RestServlet): request.setResponseCode(e.code) # Show a failure page with a reason - html_template = load_jinja2_templates( + [html_template] = load_jinja2_templates( self.config.email_template_dir, [self.config.email_registration_template_failure_html], ) -- cgit 1.4.1 From 5d833f09236b0202197ee70824d25c7bfd31c161 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 6 Sep 2019 13:27:55 +0100 Subject: Add changelog --- changelog.d/5994.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5994.feature diff --git a/changelog.d/5994.feature b/changelog.d/5994.feature new file mode 100644 index 0000000000..5b69b97fe7 --- /dev/null +++ b/changelog.d/5994.feature @@ -0,0 +1 @@ +Add the ability to send registration emails from the homeserver rather than delegating to an identity server. \ No newline at end of file -- cgit 1.4.1 From cf5a420c8a5cbcb6ffbbdbc562e07cfde8d09ab4 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:34:42 +0100 Subject: Apply suggestions from code review Co-Authored-By: Erik Johnston --- synapse/rest/client/v2_alpha/account.py | 2 +- synapse/rest/client/v2_alpha/register.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 5babed7da8..785d01ea52 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -261,7 +261,7 @@ class PasswordResetSubmitTokenServlet(RestServlet): request.setResponseCode(e.code) # Show a failure page with a reason - [html_template] = load_jinja2_templates( + html_template, = load_jinja2_templates( self.config.email_template_dir, [self.config.email_password_reset_template_failure_html], ) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 27f4addc41..5c7a5f3579 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -293,7 +293,7 @@ class RegistrationSubmitTokenServlet(RestServlet): request.setResponseCode(e.code) # Show a failure page with a reason - [html_template] = load_jinja2_templates( + html_template, = load_jinja2_templates( self.config.email_template_dir, [self.config.email_registration_template_failure_html], ) -- cgit 1.4.1 From 142c9325c27432f9e40c345166def1dd20355433 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 6 Sep 2019 14:09:16 +0100 Subject: Correctly handle non-bool m.federate flag --- synapse/handlers/stats.py | 4 +++- synapse/storage/stats.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 921735edb3..3c265f3718 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -260,7 +260,9 @@ class StatsHandler(StateDeltasHandler): room_stats_delta["local_users_in_room"] += delta elif typ == EventTypes.Create: - room_state["is_federatable"] = event_content.get("m.federate", True) + room_state["is_federatable"] = ( + event_content.get("m.federate", True) is True + ) if sender and self.is_mine_id(sender): user_to_stats_deltas.setdefault(sender, Counter())[ "rooms_created" diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 6560173c08..09190d684e 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -823,7 +823,9 @@ class StatsStore(StateDeltasStore): elif event.type == EventTypes.CanonicalAlias: room_state["canonical_alias"] = event.content.get("alias") elif event.type == EventTypes.Create: - room_state["is_federatable"] = event.content.get("m.federate", True) + room_state["is_federatable"] = ( + event.content.get("m.federate", True) is True + ) yield self.update_room_state(room_id, room_state) -- cgit 1.4.1 From 85275c89d79c7780f9a49ff206e906701308d55c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 6 Sep 2019 14:21:14 +0100 Subject: Newsfile --- changelog.d/5998.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5998.bugfix diff --git a/changelog.d/5998.bugfix b/changelog.d/5998.bugfix new file mode 100644 index 0000000000..9ea095103b --- /dev/null +++ b/changelog.d/5998.bugfix @@ -0,0 +1 @@ +Fix room and user stats tracking. -- cgit 1.4.1 From 78801e7f9e355660d2c80f0923ff7c4c19f0f004 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 6 Sep 2019 15:36:50 +0100 Subject: Ensure a sid parameter is passed to bind_threepid (#5995) `sid` is required to be part of `three_pid_creds`. We were 500'ing if it wasn't provided instead of returning `M_MISSING_PARAM`. --- changelog.d/5995.bugfix | 1 + synapse/handlers/identity.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5995.bugfix diff --git a/changelog.d/5995.bugfix b/changelog.d/5995.bugfix new file mode 100644 index 0000000000..e03ab98bc6 --- /dev/null +++ b/changelog.d/5995.bugfix @@ -0,0 +1 @@ +Return a M_MISSING_PARAM if `sid` is not provided to `/account/3pid`. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 71b5a87392..45db1c1c06 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -151,12 +151,18 @@ class IdentityHandler(BaseHandler): creds ) + sid = creds.get("sid") + if not sid: + raise SynapseError( + 400, "No sid in three_pid_creds", errcode=Codes.MISSING_PARAM + ) + # If an id_access_token is not supplied, force usage of v1 if id_access_token is None: use_v2 = False # Decide which API endpoint URLs to use - bind_data = {"sid": creds["sid"], "client_secret": client_secret, "mxid": mxid} + bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid} if use_v2: bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,) bind_data["id_access_token"] = id_access_token -- cgit 1.4.1 From 55d5b3af8863167432017f23cd8a04a0c14c9d23 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 7 Sep 2019 01:45:51 +1000 Subject: Servers-known-about statistic (#5981) --- changelog.d/5981.feature | 1 + docs/sample_config.yaml | 10 ++++ synapse/config/metrics.py | 31 ++++++++++ synapse/storage/roommember.py | 59 ++++++++++++++++++ tests/config/test_generate.py | 25 ++++---- tests/config/test_load.py | 34 +++++++---- tests/storage/test_roommember.py | 126 +++++++++++++++++++++++++++------------ 7 files changed, 226 insertions(+), 60 deletions(-) create mode 100644 changelog.d/5981.feature diff --git a/changelog.d/5981.feature b/changelog.d/5981.feature new file mode 100644 index 0000000000..e39514273d --- /dev/null +++ b/changelog.d/5981.feature @@ -0,0 +1 @@ +Setting metrics_flags.known_servers to True in the configuration will publish the synapse_federation_known_servers metric over Prometheus. This represents the total number of servers your server knows about (i.e. is in rooms with), including itself. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 186cdbedd2..93c0edd8ce 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -958,6 +958,16 @@ account_threepid_delegates: #sentry: # dsn: "..." +# Flags to enable Prometheus metrics which are not suitable to be +# enabled by default, either for performance reasons or limited use. +# +metrics_flags: + # Publish synapse_federation_known_servers, a g auge of the number of + # servers this homeserver knows about, including itself. May cause + # performance problems on large homeservers. + # + #known_servers: true + # Whether or not to report anonymized homeserver usage statistics. # report_stats: true|false diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 3698441963..653b990e67 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import attr + from ._base import Config, ConfigError MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentry @@ -20,6 +23,18 @@ MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentr """ +@attr.s +class MetricsFlags(object): + known_servers = attr.ib(default=False, validator=attr.validators.instance_of(bool)) + + @classmethod + def all_off(cls): + """ + Instantiate the flags with all options set to off. + """ + return cls(**{x.name: False for x in attr.fields(cls)}) + + class MetricsConfig(Config): def read_config(self, config, **kwargs): self.enable_metrics = config.get("enable_metrics", False) @@ -27,6 +42,12 @@ class MetricsConfig(Config): self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") + if self.enable_metrics: + _metrics_config = config.get("metrics_flags") or {} + self.metrics_flags = MetricsFlags(**_metrics_config) + else: + self.metrics_flags = MetricsFlags.all_off() + self.sentry_enabled = "sentry" in config if self.sentry_enabled: try: @@ -58,6 +79,16 @@ class MetricsConfig(Config): #sentry: # dsn: "..." + # Flags to enable Prometheus metrics which are not suitable to be + # enabled by default, either for performance reasons or limited use. + # + metrics_flags: + # Publish synapse_federation_known_servers, a g auge of the number of + # servers this homeserver knows about, including itself. May cause + # performance problems on large homeservers. + # + #known_servers: true + # Whether or not to report anonymized homeserver usage statistics. """ diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index f8b682ebd9..4df8ebdacd 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -24,8 +24,10 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.constants import EventTypes, Membership +from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import LoggingTransaction +from synapse.storage.engines import Sqlite3Engine from synapse.storage.events_worker import EventsWorkerStore from synapse.types import get_domain_from_id from synapse.util.async_helpers import Linearizer @@ -74,6 +76,63 @@ class RoomMemberWorkerStore(EventsWorkerStore): self._check_safe_current_state_events_membership_updated_txn(txn) txn.close() + if self.hs.config.metrics_flags.known_servers: + self._known_servers_count = 1 + self.hs.get_clock().looping_call( + run_as_background_process, + 60 * 1000, + "_count_known_servers", + self._count_known_servers, + ) + self.hs.get_clock().call_later( + 1000, + run_as_background_process, + "_count_known_servers", + self._count_known_servers, + ) + LaterGauge( + "synapse_federation_known_servers", + "", + [], + lambda: self._known_servers_count, + ) + + @defer.inlineCallbacks + def _count_known_servers(self): + """ + Count the servers that this server knows about. + + The statistic is stored on the class for the + `synapse_federation_known_servers` LaterGauge to collect. + """ + + def _transact(txn): + if isinstance(self.database_engine, Sqlite3Engine): + query = """ + SELECT COUNT(DISTINCT substr(out.user_id, pos+1)) + FROM ( + SELECT rm.user_id as user_id, instr(rm.user_id, ':') + AS pos FROM room_memberships as rm + INNER JOIN current_state_events as c ON rm.event_id = c.event_id + WHERE c.type = 'm.room.member' + ) as out + """ + else: + query = """ + SELECT COUNT(DISTINCT split_part(state_key, ':', 2)) + FROM current_state_events + WHERE type = 'm.room.member' AND membership = 'join'; + """ + txn.execute(query) + return list(txn)[0][0] + + count = yield self.runInteraction("get_known_servers", _transact) + + # We always know about ourselves, even if we have nothing in + # room_memberships (for example, the server is new). + self._known_servers_count = max([count, 1]) + return self._known_servers_count + def _check_safe_current_state_events_membership_updated_txn(self, txn): """Checks if it is safe to assume the new current_state_events membership column is up to date diff --git a/tests/config/test_generate.py b/tests/config/test_generate.py index 5017cbce85..2684e662de 100644 --- a/tests/config/test_generate.py +++ b/tests/config/test_generate.py @@ -17,6 +17,8 @@ import os.path import re import shutil import tempfile +from contextlib import redirect_stdout +from io import StringIO from synapse.config.homeserver import HomeServerConfig @@ -32,17 +34,18 @@ class ConfigGenerationTestCase(unittest.TestCase): shutil.rmtree(self.dir) def test_generate_config_generates_files(self): - HomeServerConfig.load_or_generate_config( - "", - [ - "--generate-config", - "-c", - self.file, - "--report-stats=yes", - "-H", - "lemurs.win", - ], - ) + with redirect_stdout(StringIO()): + HomeServerConfig.load_or_generate_config( + "", + [ + "--generate-config", + "-c", + self.file, + "--report-stats=yes", + "-H", + "lemurs.win", + ], + ) self.assertSetEqual( set(["homeserver.yaml", "lemurs.win.log.config", "lemurs.win.signing.key"]), diff --git a/tests/config/test_load.py b/tests/config/test_load.py index 6bfc1970ad..b3e557bd6a 100644 --- a/tests/config/test_load.py +++ b/tests/config/test_load.py @@ -15,6 +15,8 @@ import os.path import shutil import tempfile +from contextlib import redirect_stdout +from io import StringIO import yaml @@ -26,7 +28,6 @@ from tests import unittest class ConfigLoadingTestCase(unittest.TestCase): def setUp(self): self.dir = tempfile.mkdtemp() - print(self.dir) self.file = os.path.join(self.dir, "homeserver.yaml") def tearDown(self): @@ -94,18 +95,27 @@ class ConfigLoadingTestCase(unittest.TestCase): ) self.assertTrue(config.enable_registration) + def test_stats_enabled(self): + self.generate_config_and_remove_lines_containing("enable_metrics") + self.add_lines_to_config(["enable_metrics: true"]) + + # The default Metrics Flags are off by default. + config = HomeServerConfig.load_config("", ["-c", self.file]) + self.assertFalse(config.metrics_flags.known_servers) + def generate_config(self): - HomeServerConfig.load_or_generate_config( - "", - [ - "--generate-config", - "-c", - self.file, - "--report-stats=yes", - "-H", - "lemurs.win", - ], - ) + with redirect_stdout(StringIO()): + HomeServerConfig.load_or_generate_config( + "", + [ + "--generate-config", + "-c", + self.file, + "--report-stats=yes", + "-H", + "lemurs.win", + ], + ) def generate_config_and_remove_lines_containing(self, needle): self.generate_config() diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 64cb294c37..447a3c6ffb 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,78 +14,129 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from mock import Mock - -from twisted.internet import defer +from unittest.mock import Mock from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions -from synapse.types import Requester, RoomID, UserID +from synapse.rest.admin import register_servlets_for_client_rest_resource +from synapse.rest.client.v1 import login, room +from synapse.types import Requester, UserID from tests import unittest -from tests.utils import create_room, setup_test_homeserver -class RoomMemberStoreTestCase(unittest.TestCase): - @defer.inlineCallbacks - def setUp(self): - hs = yield setup_test_homeserver( - self.addCleanup, resource_for_federation=Mock(), http_client=None +class RoomMemberStoreTestCase(unittest.HomeserverTestCase): + + servlets = [ + login.register_servlets, + register_servlets_for_client_rest_resource, + room.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver( + resource_for_federation=Mock(), http_client=None ) + return hs + + def prepare(self, reactor, clock, hs): + # We can't test the RoomMemberStore on its own without the other event # storage logic self.store = hs.get_datastore() self.event_builder_factory = hs.get_event_builder_factory() self.event_creation_handler = hs.get_event_creation_handler() - self.u_alice = UserID.from_string("@alice:test") - self.u_bob = UserID.from_string("@bob:test") + self.u_alice = self.register_user("alice", "pass") + self.t_alice = self.login("alice", "pass") + self.u_bob = self.register_user("bob", "pass") # User elsewhere on another host self.u_charlie = UserID.from_string("@charlie:elsewhere") - self.room = RoomID.from_string("!abc123:test") - - yield create_room(hs, self.room.to_string(), self.u_alice.to_string()) - - @defer.inlineCallbacks def inject_room_member(self, room, user, membership, replaces_state=None): builder = self.event_builder_factory.for_room_version( RoomVersions.V1, { "type": EventTypes.Member, - "sender": user.to_string(), - "state_key": user.to_string(), - "room_id": room.to_string(), + "sender": user, + "state_key": user, + "room_id": room, "content": {"membership": membership}, }, ) - event, context = yield self.event_creation_handler.create_new_client_event( - builder + event, context = self.get_success( + self.event_creation_handler.create_new_client_event(builder) ) - yield self.store.persist_event(event, context) + self.get_success(self.store.persist_event(event, context)) return event - @defer.inlineCallbacks def test_one_member(self): - yield self.inject_room_member(self.room, self.u_alice, Membership.JOIN) - - self.assertEquals( - [self.room.to_string()], - [ - m.room_id - for m in ( - yield self.store.get_rooms_for_user_where_membership_is( - self.u_alice.to_string(), [Membership.JOIN] - ) - ) - ], + + # Alice creates the room, and is automatically joined + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + + rooms_for_user = self.get_success( + self.store.get_rooms_for_user_where_membership_is( + self.u_alice, [Membership.JOIN] + ) ) + self.assertEquals([self.room], [m.room_id for m in rooms_for_user]) + + def test_count_known_servers(self): + """ + _count_known_servers will calculate how many servers are in a room. + """ + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.inject_room_member(self.room, self.u_bob, Membership.JOIN) + self.inject_room_member(self.room, self.u_charlie.to_string(), Membership.JOIN) + + servers = self.get_success(self.store._count_known_servers()) + self.assertEqual(servers, 2) + + def test_count_known_servers_stat_counter_disabled(self): + """ + If enabled, the metrics for how many servers are known will be counted. + """ + self.assertTrue("_known_servers_count" not in self.store.__dict__.keys()) + + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.inject_room_member(self.room, self.u_bob, Membership.JOIN) + self.inject_room_member(self.room, self.u_charlie.to_string(), Membership.JOIN) + + self.pump(20) + + self.assertTrue("_known_servers_count" not in self.store.__dict__.keys()) + + @unittest.override_config( + {"enable_metrics": True, "metrics_flags": {"known_servers": True}} + ) + def test_count_known_servers_stat_counter_enabled(self): + """ + If enabled, the metrics for how many servers are known will be counted. + """ + # Initialises to 1 -- itself + self.assertEqual(self.store._known_servers_count, 1) + + self.pump(20) + + # No rooms have been joined, so technically the SQL returns 0, but it + # will still say it knows about itself. + self.assertEqual(self.store._known_servers_count, 1) + + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.inject_room_member(self.room, self.u_bob, Membership.JOIN) + self.inject_room_member(self.room, self.u_charlie.to_string(), Membership.JOIN) + + self.pump(20) + + # It now knows about Charlie's server. + self.assertEqual(self.store._known_servers_count, 2) + class CurrentStateMembershipUpdateTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, homeserver): -- cgit 1.4.1 From 561cbba0577b63f340050362144bef8527c1fc0e Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Fri, 6 Sep 2019 16:44:24 -0400 Subject: split out signature processing into separate functions --- synapse/handlers/e2e_keys.py | 399 ++++++++++++++++++++++--------------------- 1 file changed, 204 insertions(+), 195 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 2c21cb9828..6500bf3e16 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -624,234 +624,243 @@ class E2eKeysHandler(object): # signatures to be stored. Each item will be a tuple of # (signing_key_id, target_user_id, target_device_id, signature) signature_list = [] - # what devices have been updated, for notifying - self_device_ids = [] # split between checking signatures for own user and signatures for # other users, since we verify them with different keys self_signatures = signatures.get(user_id, {}) other_signatures = {k: v for k, v in signatures.items() if k != user_id} - if self_signatures: - self_device_ids = list(self_signatures.keys()) - try: - # get our self-signing key to verify the signatures - self_signing_key, self_signing_key_id, self_signing_verify_key = yield self._get_e2e_cross_signing_verify_key( - user_id, "self_signing" - ) - # get our master key, since it may be signed - master_key, master_key_id, master_verify_key = yield self._get_e2e_cross_signing_verify_key( - user_id, "master" - ) + self_signature_list, self_failures = yield self._process_self_signatures( + user_id, self_signatures + ) + signature_list.extend(self_signature_list) + failures.update(self_failures) - # fetch our stored devices. This is used to 1. verify - # signatures on the master key, and 2. to can compare with what - # was sent if the device was signed - devices = yield self.store.get_e2e_device_keys([(user_id, None)]) + other_signature_list, other_failures = yield self._process_other_signatures( + user_id, other_signatures + ) + signature_list.extend(other_signature_list) + failures.update(other_failures) - if user_id not in devices: - raise SynapseError(404, "No device keys found", Codes.NOT_FOUND) + # store the signature, and send the appropriate notifications for sync + logger.debug("upload signature failures: %r", failures) + yield self.store.store_e2e_cross_signing_signatures(user_id, signature_list) - devices = devices[user_id] - for device_id, device in self_signatures.items(): - try: + self_device_ids = [device_id for (_, _, device_id, _) in self_signature_list] + if self_device_ids: + yield self.device_handler.notify_device_update(user_id, self_device_ids) + signed_users = [user_id for (_, user_id, _, _) in other_signature_list] + if signed_users: + yield self.device_handler.notify_user_signature_update( + user_id, signed_users + ) + + return {"failures": failures} + + @defer.inlineCallbacks + def _process_self_signatures(self, user_id, signatures): + signature_list = [] + failures = {} + if not signatures: + return signature_list, failures + + try: + # get our self-signing key to verify the signatures + self_signing_key, self_signing_key_id, self_signing_verify_key = yield self._get_e2e_cross_signing_verify_key( + user_id, "self_signing" + ) + + # get our master key, since it may be signed + master_key, master_key_id, master_verify_key = yield self._get_e2e_cross_signing_verify_key( + user_id, "master" + ) + + # fetch our stored devices. This is used to 1. verify + # signatures on the master key, and 2. to can compare with what + # was sent if the device was signed + devices = yield self.store.get_e2e_device_keys([(user_id, None)]) + + if user_id not in devices: + raise SynapseError(404, "No device keys found", Codes.NOT_FOUND) + + devices = devices[user_id] + except SynapseError as e: + failures[user_id] = { + device: _exception_to_failure(e) + for device in signatures.keys() + } + return signature_list, failures + + for device_id, device in signatures.items(): + try: + if ( + "signatures" not in device + or user_id not in device["signatures"] + ): + # no signature was sent + raise SynapseError( + 400, "Invalid signature", Codes.INVALID_SIGNATURE + ) + + if device_id == master_verify_key.version: + # we have master key signed by devices: for each + # device that signed, check the signature. Since + # the "failures" property in the response only has + # granularity up to the signed device, either all + # of the signatures on the master key succeed, or + # all fail. So loop over the signatures and add + # them to a separate signature list. If everything + # works out, then add them all to the main + # signature list. (In practice, we're likely to + # only have only one signature anyways.) + master_key_signature_list = [] + sigs = device["signatures"] + for signing_key_id, signature in sigs[user_id].items(): + alg, signing_device_id = signing_key_id.split(":", 1) if ( - "signatures" not in device - or user_id not in device["signatures"] + signing_device_id not in devices + or signing_key_id + not in devices[signing_device_id]["keys"]["keys"] ): - # no signature was sent + # signed by an unknown device, or the + # device does not have the key raise SynapseError( - 400, "Invalid signature", Codes.INVALID_SIGNATURE + 400, + "Invalid signature", + Codes.INVALID_SIGNATURE, ) - if device_id == master_verify_key.version: - # we have master key signed by devices: for each - # device that signed, check the signature. Since - # the "failures" property in the response only has - # granularity up to the signed device, either all - # of the signatures on the master key succeed, or - # all fail. So loop over the signatures and add - # them to a separate signature list. If everything - # works out, then add them all to the main - # signature list. (In practice, we're likely to - # only have only one signature anyways.) - master_key_signature_list = [] - for signing_key_id, signature in device["signatures"][ - user_id - ].items(): - alg, signing_device_id = signing_key_id.split(":", 1) - if ( - signing_device_id not in devices - or signing_key_id - not in devices[signing_device_id]["keys"]["keys"] - ): - # signed by an unknown device, or the - # device does not have the key - raise SynapseError( - 400, - "Invalid signature", - Codes.INVALID_SIGNATURE, - ) - - sigs = device["signatures"] - del device["signatures"] - # use pop to avoid exception if key doesn't exist - device.pop("unsigned", None) - master_key.pop("signature", None) - master_key.pop("unsigned", None) - - if master_key != device: - raise SynapseError(400, "Key does not match") - - # get the key and check the signature - pubkey = devices[signing_device_id]["keys"]["keys"][ - signing_key_id - ] - verify_key = decode_verify_key_bytes( - signing_key_id, decode_base64(pubkey) - ) - device["signatures"] = sigs - try: - verify_signed_json(device, user_id, verify_key) - except SignatureVerifyException: - raise SynapseError( - 400, - "Invalid signature", - Codes.INVALID_SIGNATURE, - ) - - master_key_signature_list.append( - (signing_key_id, user_id, device_id, signature) - ) - - signature_list.extend(master_key_signature_list) - continue - - # at this point, we have a device that should be signed - # by the self-signing key - if self_signing_key_id not in device["signatures"][user_id]: - # no signature was sent - raise SynapseError( - 400, "Invalid signature", Codes.INVALID_SIGNATURE - ) - - stored_device = None - try: - stored_device = devices[device_id]["keys"] - except KeyError: - raise SynapseError(404, "Unknown device", Codes.NOT_FOUND) - if self_signing_key_id in stored_device.get( - "signatures", {} - ).get(user_id, {}): - # we already have a signature on this device, so we - # can skip it, since it should be exactly the same - continue - - _check_device_signature( - user_id, self_signing_verify_key, device, stored_device + # get the key and check the signature + pubkey = devices[signing_device_id]["keys"]["keys"][ + signing_key_id + ] + verify_key = decode_verify_key_bytes( + signing_key_id, decode_base64(pubkey) ) + _check_device_signature(user_id, verify_key, device, master_key) + device["signatures"] = sigs - signature = device["signatures"][user_id][self_signing_key_id] - signature_list.append( - (self_signing_key_id, user_id, device_id, signature) + master_key_signature_list.append( + (signing_key_id, user_id, device_id, signature) ) - except SynapseError as e: - failures.setdefault(user_id, {})[ - device_id - ] = _exception_to_failure(e) + + signature_list.extend(master_key_signature_list) + continue + + # at this point, we have a device that should be signed + # by the self-signing key + if self_signing_key_id not in device["signatures"][user_id]: + # no signature was sent + raise SynapseError( + 400, "Invalid signature", Codes.INVALID_SIGNATURE + ) + + stored_device = None + try: + stored_device = devices[device_id]["keys"] + except KeyError: + raise SynapseError(404, "Unknown device", Codes.NOT_FOUND) + if self_signing_key_id in stored_device.get( + "signatures", {} + ).get(user_id, {}): + # we already have a signature on this device, so we + # can skip it, since it should be exactly the same + continue + + _check_device_signature( + user_id, self_signing_verify_key, device, stored_device + ) + + signature = device["signatures"][user_id][self_signing_key_id] + signature_list.append( + (self_signing_key_id, user_id, device_id, signature) + ) except SynapseError as e: - failures[user_id] = { - device: _exception_to_failure(e) - for device in self_signatures.keys() - } + failures.setdefault(user_id, {})[ + device_id + ] = _exception_to_failure(e) + + return signature_list, failures + + @defer.inlineCallbacks + def _process_other_signatures(self, user_id, signatures): + # now check non-self signatures. These signatures will be signed + # by the user-signing key + signature_list = [] + failures = {} + if not signatures: + return signature_list, failures - signed_users = [] # what user have been signed, for notifying - if other_signatures: - # now check non-self signatures. These signatures will be signed - # by the user-signing key + try: + # get our user-signing key to verify the signatures + user_signing_key, user_signing_key_id, user_signing_verify_key = yield self._get_e2e_cross_signing_verify_key( + user_id, "user_signing" + ) + except SynapseError as e: + failure = _exception_to_failure(e) + for user, devicemap in signatures.items(): + failures[user] = { + device_id: failure for device_id in devicemap.keys() + } + return signature_list, failures + for user, devicemap in signatures.items(): + device_id = None try: - # get our user-signing key to verify the signatures - user_signing_key, user_signing_key_id, user_signing_verify_key = yield self._get_e2e_cross_signing_verify_key( - user_id, "user_signing" + # get the user's master key, to make sure it matches + # what was sent + stored_key, stored_key_id, _ = yield self._get_e2e_cross_signing_verify_key( + user, "master", user_id ) - for user, devicemap in other_signatures.items(): + # make sure that the user's master key is the one that + # was signed (and no others) + device_id = stored_key_id.split(":", 1)[1] + if device_id not in devicemap: + logger.error( + "upload signature: could not find signature for device %s", + device_id, + ) + # set device to None so that the failure gets + # marked on all the signatures device_id = None - try: - # get the user's master key, to make sure it matches - # what was sent - stored_key, stored_key_id, _ = yield self._get_e2e_cross_signing_verify_key( - user, "master", user_id - ) - - # make sure that the user's master key is the one that - # was signed (and no others) - device_id = stored_key_id.split(":", 1)[1] - if device_id not in devicemap: - # set device to None so that the failure gets - # marked on all the signatures - device_id = None - logger.error( - "upload signature: wrong device: %s vs %s", - device, - devicemap, - ) - raise SynapseError(404, "Unknown device", Codes.NOT_FOUND) - key = devicemap[device_id] - del devicemap[device_id] - if len(devicemap) > 0: - # other devices were signed -- mark those as failures - logger.error("upload signature: too many devices specified") - failure = _exception_to_failure( - SynapseError(404, "Unknown device", Codes.NOT_FOUND) - ) - failures[user] = { - device: failure for device in devicemap.keys() - } + raise SynapseError(404, "Unknown device", Codes.NOT_FOUND) + key = devicemap[device_id] + other_devices = [k for k in devicemap.keys() if k != device_id] + if other_devices: + # other devices were signed -- mark those as failures + logger.error("upload signature: too many devices specified") + failure = _exception_to_failure( + SynapseError(404, "Unknown device", Codes.NOT_FOUND) + ) + failures[user] = { + device: failure for device in other_devices + } - if user_signing_key_id in stored_key.get("signatures", {}).get( - user_id, {} - ): - # we already have the signature, so we can skip it - continue + if user_signing_key_id in stored_key.get("signatures", {}).get( + user_id, {} + ): + # we already have the signature, so we can skip it + continue - _check_device_signature( - user_id, user_signing_verify_key, key, stored_key - ) + _check_device_signature( + user_id, user_signing_verify_key, key, stored_key + ) - signed_users.append(user) - signature = key["signatures"][user_id][user_signing_key_id] - signature_list.append( - (user_signing_key_id, user, device_id, signature) - ) - except SynapseError as e: - failure = _exception_to_failure(e) - if device_id is None: - failures[user] = { - device_id: failure for device_id in devicemap.keys() - } - else: - failures.setdefault(user, {})[device_id] = failure + signature = key["signatures"][user_id][user_signing_key_id] + signature_list.append( + (user_signing_key_id, user, device_id, signature) + ) except SynapseError as e: failure = _exception_to_failure(e) - for user, devicemap in signature.items(): + if device_id is None: failures[user] = { device_id: failure for device_id in devicemap.keys() } + else: + failures.setdefault(user, {})[device_id] = failure - # store the signature, and send the appropriate notifications for sync - logger.debug("upload signature failures: %r", failures) - yield self.store.store_e2e_cross_signing_signatures(user_id, signature_list) - - if len(self_device_ids): - yield self.device_handler.notify_device_update(user_id, self_device_ids) - if len(signed_users): - yield self.device_handler.notify_user_signature_update( - user_id, signed_users - ) - - return {"failures": failures} + return signature_list, failures @defer.inlineCallbacks def _get_e2e_cross_signing_verify_key(self, user_id, key_type, from_user_id=None): -- cgit 1.4.1 From 415d0a00e0845654b34542b9914ea01224dd8ed6 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Fri, 6 Sep 2019 16:46:45 -0400 Subject: run black --- synapse/handlers/e2e_keys.py | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 6500bf3e16..95f3cc891b 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -686,17 +686,13 @@ class E2eKeysHandler(object): devices = devices[user_id] except SynapseError as e: failures[user_id] = { - device: _exception_to_failure(e) - for device in signatures.keys() + device: _exception_to_failure(e) for device in signatures.keys() } return signature_list, failures for device_id, device in signatures.items(): try: - if ( - "signatures" not in device - or user_id not in device["signatures"] - ): + if "signatures" not in device or user_id not in device["signatures"]: # no signature was sent raise SynapseError( 400, "Invalid signature", Codes.INVALID_SIGNATURE @@ -725,9 +721,7 @@ class E2eKeysHandler(object): # signed by an unknown device, or the # device does not have the key raise SynapseError( - 400, - "Invalid signature", - Codes.INVALID_SIGNATURE, + 400, "Invalid signature", Codes.INVALID_SIGNATURE ) # get the key and check the signature @@ -760,9 +754,9 @@ class E2eKeysHandler(object): stored_device = devices[device_id]["keys"] except KeyError: raise SynapseError(404, "Unknown device", Codes.NOT_FOUND) - if self_signing_key_id in stored_device.get( - "signatures", {} - ).get(user_id, {}): + if self_signing_key_id in stored_device.get("signatures", {}).get( + user_id, {} + ): # we already have a signature on this device, so we # can skip it, since it should be exactly the same continue @@ -776,9 +770,7 @@ class E2eKeysHandler(object): (self_signing_key_id, user_id, device_id, signature) ) except SynapseError as e: - failures.setdefault(user_id, {})[ - device_id - ] = _exception_to_failure(e) + failures.setdefault(user_id, {})[device_id] = _exception_to_failure(e) return signature_list, failures @@ -799,9 +791,7 @@ class E2eKeysHandler(object): except SynapseError as e: failure = _exception_to_failure(e) for user, devicemap in signatures.items(): - failures[user] = { - device_id: failure for device_id in devicemap.keys() - } + failures[user] = {device_id: failure for device_id in devicemap.keys()} return signature_list, failures for user, devicemap in signatures.items(): @@ -833,9 +823,7 @@ class E2eKeysHandler(object): failure = _exception_to_failure( SynapseError(404, "Unknown device", Codes.NOT_FOUND) ) - failures[user] = { - device: failure for device in other_devices - } + failures[user] = {device: failure for device in other_devices} if user_signing_key_id in stored_key.get("signatures", {}).get( user_id, {} @@ -848,9 +836,7 @@ class E2eKeysHandler(object): ) signature = key["signatures"][user_id][user_signing_key_id] - signature_list.append( - (user_signing_key_id, user, device_id, signature) - ) + signature_list.append((user_signing_key_id, user, device_id, signature)) except SynapseError as e: failure = _exception_to_failure(e) if device_id is None: -- cgit 1.4.1 From ab729e31cfca4d1a958937bb576010271b9c8044 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Fri, 6 Sep 2019 17:52:37 -0400 Subject: use something that's the right type for user_id --- tests/handlers/test_e2e_keys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 7a59ec5085..854eb6c024 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -266,7 +266,7 @@ class E2eKeysHandlerTestCase(unittest.TestCase): device_key_1["signatures"][local_user]["ed25519:abc"] = "base64+signature" device_key_2["signatures"][local_user]["ed25519:def"] = "base64+signature" devices = yield self.handler.query_devices( - {"device_keys": {local_user: []}}, 0, 0 + {"device_keys": {local_user: []}}, 0, local_user ) del devices["device_keys"][local_user]["abc"]["unsigned"] del devices["device_keys"][local_user]["def"]["unsigned"] -- cgit 1.4.1 From d3f2fbcfe577f42d0208d15a57bd66e56186742a Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Sat, 7 Sep 2019 14:13:18 -0400 Subject: add function docs --- synapse/handlers/e2e_keys.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 95f3cc891b..cca361b15b 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -659,6 +659,18 @@ class E2eKeysHandler(object): @defer.inlineCallbacks def _process_self_signatures(self, user_id, signatures): + """Process uploaded signatures of the user's own keys. + + Args: + user_id (string): the user uploading the keys + signatures (dict[string, dict]): map of devices to signed keys + + Returns: + (list[(string, string, string, string)], dict[string, dict[string, dict]]): + a list of signatures to upload, in the form (signing_key_id, target_user_id, + target_device_id, signature), and a map of users to devices to failure + reasons + """ signature_list = [] failures = {} if not signatures: @@ -776,8 +788,18 @@ class E2eKeysHandler(object): @defer.inlineCallbacks def _process_other_signatures(self, user_id, signatures): - # now check non-self signatures. These signatures will be signed - # by the user-signing key + """Process uploaded signatures of other users' keys. + + Args: + user_id (string): the user uploading the keys + signatures (dict[string, dict]): map of users to devices to signed keys + + Returns: + (list[(string, string, string, string)], dict[string, dict[string, dict]]): + a list of signatures to upload, in the form (signing_key_id, target_user_id, + target_device_id, signature), and a map of users to devices to failure + reasons + """ signature_list = [] failures = {} if not signatures: -- cgit 1.4.1 From 05bae6b4fc97943b3738bac3175da1bc49f13512 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 10:13:14 +0100 Subject: Add opentracing span for HTTP push --- synapse/push/httppusher.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index bd5d53af91..6299587808 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -22,6 +22,7 @@ from prometheus_client import Counter from twisted.internet import defer from twisted.internet.error import AlreadyCalled, AlreadyCancelled +from synapse.logging import opentracing from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import PusherConfigException @@ -194,7 +195,17 @@ class HttpPusher(object): ) for push_action in unprocessed: - processed = yield self._process_one(push_action) + with opentracing.start_active_span( + "http-push", + tags={ + "authenticated_entity": self.user_id, + "event_id": push_action["event_id"], + "app_id": self.app_id, + "app_display_name": self.app_display_name, + }, + ): + processed = yield self._process_one(push_action) + if processed: http_push_processed_counter.inc() self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC -- cgit 1.4.1 From a852e93408bf86a5acd939c58954621f653b56b6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 10:14:51 +0100 Subject: Newsfile --- changelog.d/6003.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6003.misc diff --git a/changelog.d/6003.misc b/changelog.d/6003.misc new file mode 100644 index 0000000000..4152d05f87 --- /dev/null +++ b/changelog.d/6003.misc @@ -0,0 +1 @@ +Add opentracing span over HTTP push processing. -- cgit 1.4.1 From be618e055178f4aa9865ab426182218312bed07f Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 14:43:51 +0300 Subject: Only count real users when checking for auto-creation of auto-join room Previously if the first registered user was a "support" or "bot" user, when the first real user registers, the auto-join rooms were not created. Fix to exclude non-real (ie users with a special user type) users when counting how many users there are to determine whether we should auto-create a room. Signed-off-by: Jason Robinson --- changelog.d/6004.bugfix | 1 + synapse/handlers/register.py | 12 ++++-------- synapse/storage/registration.py | 39 +++++++++++++++++++++++++++++++++++++++ tests/handlers/test_register.py | 29 +++++++++++++++++++++++++++-- 4 files changed, 71 insertions(+), 10 deletions(-) create mode 100644 changelog.d/6004.bugfix diff --git a/changelog.d/6004.bugfix b/changelog.d/6004.bugfix new file mode 100644 index 0000000000..45c179c8fd --- /dev/null +++ b/changelog.d/6004.bugfix @@ -0,0 +1 @@ +Only count real users when checking for auto-creation of auto-join room. diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 975da57ffd..06bd03b77c 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -275,16 +275,12 @@ class RegistrationHandler(BaseHandler): fake_requester = create_requester(user_id) # try to create the room if we're the first real user on the server. Note - # that an auto-generated support user is not a real user and will never be + # that an auto-generated support or bot user is not a real user and will never be # the user to create the room should_auto_create_rooms = False - is_support = yield self.store.is_support_user(user_id) - # There is an edge case where the first user is the support user, then - # the room is never created, though this seems unlikely and - # recoverable from given the support user being involved in the first - # place. - if self.hs.config.autocreate_auto_join_rooms and not is_support: - count = yield self.store.count_all_users() + is_real_user = yield self.store.is_real_user(user_id) + if self.hs.config.autocreate_auto_join_rooms and is_real_user: + count = yield self.store.count_real_users() should_auto_create_rooms = count == 1 for r in self.hs.config.auto_join_rooms: logger.info("Auto-joining %s to %s", user_id, r) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 5138792a5f..b054d86ae0 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -322,6 +322,21 @@ class RegistrationWorkerStore(SQLBaseStore): return None + @cachedInlineCallbacks() + def is_real_user(self, user_id): + """Determines if the user is a real user, ie does not have a 'user_type'. + + Args: + user_id (str): user id to test + + Returns: + Deferred[bool]: True if user 'user_type' is null or empty string + """ + res = yield self.runInteraction( + "is_real_user", self.is_real_user_txn, user_id + ) + return res + @cachedInlineCallbacks() def is_support_user(self, user_id): """Determines if the user is of type UserTypes.SUPPORT @@ -337,6 +352,16 @@ class RegistrationWorkerStore(SQLBaseStore): ) return res + def is_real_user_txn(self, txn, user_id): + res = self._simple_select_one_onecol_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + retcol="user_type", + allow_none=True, + ) + return True if res is None or res == "" else False + def is_support_user_txn(self, txn, user_id): res = self._simple_select_one_onecol_txn( txn=txn, @@ -421,6 +446,20 @@ class RegistrationWorkerStore(SQLBaseStore): ret = yield self.runInteraction("count_users", _count_users) return ret + @defer.inlineCallbacks + def count_real_users(self): + """Counts all users without a special user_type registered on the homeserver.""" + + def _count_users(txn): + txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''") + rows = self.cursor_to_dict(txn) + if rows: + return rows[0]["users"] + return 0 + + ret = yield self.runInteraction("count_real_users", _count_users) + return ret + @defer.inlineCallbacks def find_next_generated_user_id_localpart(self): """ diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index e10296a5e4..1e9ba3a201 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -171,11 +171,11 @@ class RegistrationTestCase(unittest.HomeserverTestCase): rooms = self.get_success(self.store.get_rooms_for_user(user_id)) self.assertEqual(len(rooms), 0) - def test_auto_create_auto_join_rooms_when_support_user_exists(self): + def test_auto_create_auto_join_rooms_when_user_is_not_a_real_user(self): room_alias_str = "#room:test" self.hs.config.auto_join_rooms = [room_alias_str] - self.store.is_support_user = Mock(return_value=True) + self.store.is_real_user = Mock(return_value=False) user_id = self.get_success(self.handler.register_user(localpart="support")) rooms = self.get_success(self.store.get_rooms_for_user(user_id)) self.assertEqual(len(rooms), 0) @@ -183,6 +183,31 @@ class RegistrationTestCase(unittest.HomeserverTestCase): room_alias = RoomAlias.from_string(room_alias_str) self.get_failure(directory_handler.get_association(room_alias), SynapseError) + def test_auto_create_auto_join_rooms_when_user_is_the_first_real_user(self): + room_alias_str = "#room:test" + self.hs.config.auto_join_rooms = [room_alias_str] + + self.store.count_real_users = Mock(return_value=1) + self.store.is_real_user = Mock(return_value=True) + user_id = self.get_success(self.handler.register_user(localpart="real")) + rooms = self.get_success(self.store.get_rooms_for_user(user_id)) + directory_handler = self.hs.get_handlers().directory_handler + room_alias = RoomAlias.from_string(room_alias_str) + room_id = self.get_success(directory_handler.get_association(room_alias)) + + self.assertTrue(room_id["room_id"] in rooms) + self.assertEqual(len(rooms), 1) + + def test_auto_create_auto_join_rooms_when_user_is_not_the_first_real_user(self): + room_alias_str = "#room:test" + self.hs.config.auto_join_rooms = [room_alias_str] + + self.store.count_real_users = Mock(return_value=2) + self.store.is_real_user = Mock(return_value=True) + user_id = self.get_success(self.handler.register_user(localpart="real")) + rooms = self.get_success(self.store.get_rooms_for_user(user_id)) + self.assertEqual(len(rooms), 0) + def test_auto_create_auto_join_where_no_consent(self): """Test to ensure that the first user is not auto-joined to a room if they have not given general consent. -- cgit 1.4.1 From 62fac9d969cea98694093a5f80bed6bdd4848968 Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 14:59:35 +0300 Subject: Auto-fix a few code style issues Signed-off-by: Jason Robinson --- synapse/storage/registration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index b054d86ae0..9387b29503 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -332,9 +332,7 @@ class RegistrationWorkerStore(SQLBaseStore): Returns: Deferred[bool]: True if user 'user_type' is null or empty string """ - res = yield self.runInteraction( - "is_real_user", self.is_real_user_txn, user_id - ) + res = yield self.runInteraction("is_real_user", self.is_real_user_txn, user_id) return res @cachedInlineCallbacks() @@ -451,7 +449,9 @@ class RegistrationWorkerStore(SQLBaseStore): """Counts all users without a special user_type registered on the homeserver.""" def _count_users(txn): - txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''") + txn.execute( + "SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''" + ) rows = self.cursor_to_dict(txn) if rows: return rows[0]["users"] -- cgit 1.4.1 From 80e14a8546efb9e2f9edec3b1de0a8b943351252 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:23:41 +0100 Subject: Handle setting retention period to 0 --- synapse/config/server.py | 2 +- synapse/storage/events.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/config/server.py b/synapse/config/server.py index 8efab924d4..aa71835dc3 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -165,7 +165,7 @@ class ServerConfig(Config): # How long to keep redacted events in the database in unredacted form # before redacting them. redaction_retention_period = config.get("redaction_retention_period") - if redaction_retention_period: + if redaction_retention_period is not None: self.redaction_retention_period = self.parse_duration( redaction_retention_period ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a5d13ddc49..77ba7eb2af 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1566,7 +1566,7 @@ class EventsStore( Deferred """ - if not self.hs.config.redaction_retention_period: + if self.hs.config.redaction_retention_period is None: return max_pos = yield self.find_first_stream_ordering_after_ts( -- cgit 1.4.1 From fffe17b77d06927aaf64fa80be5b765c870a4ef5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:24:24 +0100 Subject: Don't start looping call unless enabled --- synapse/storage/events.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 77ba7eb2af..9ef7aefd97 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -268,7 +268,8 @@ class EventsStore( "_censor_redactions", self._censor_redactions ) - hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000) + if self.hs.config.redaction_retention_period is not None: + hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000) @defer.inlineCallbacks def _read_forward_extremities(self): -- cgit 1.4.1 From 916c69722833dd94c53d0fedeec8cc42d2085e73 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:31:00 +0100 Subject: Fixup comment --- synapse/storage/events.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 9ef7aefd97..4484ae7ce0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -269,7 +269,7 @@ class EventsStore( ) if self.hs.config.redaction_retention_period is not None: - hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000) + hs.get_clock().looping_call(_censor_redactions, 5 * 60 * 1000) @defer.inlineCallbacks def _read_forward_extremities(self): @@ -1574,9 +1574,17 @@ class EventsStore( self._clock.time_msec() - self.hs.config.redaction_retention_period ) - # We fetch all redactions that point to an event that we have that has - # a stream ordering from over a month ago, that we haven't yet censored - # in the DB. + # We fetch all redactions that: + # 1. point to an event we have that has, + # 2. has a stream ordering from before the cut off, and + # 3. we haven't yet censored. + # + # This is limited to 100 events to ensure that we don't try and do too + # much at once. We'll get called again so this should eventually catch + # up. + # + # We use the range [-max_pos, max_pos] to handle backfilled events, + # which are given negative stream ordering. sql = """ SELECT er.event_id, redacts FROM redactions INNER JOIN events AS er USING (event_id) -- cgit 1.4.1 From e7184a437062ae21846b8e071ded73526209e90c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:33:38 +0100 Subject: Use better names in SQL --- synapse/storage/events.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 4484ae7ce0..0da6e0b1a1 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1586,12 +1586,15 @@ class EventsStore( # We use the range [-max_pos, max_pos] to handle backfilled events, # which are given negative stream ordering. sql = """ - SELECT er.event_id, redacts FROM redactions - INNER JOIN events AS er USING (event_id) - INNER JOIN events AS eb ON (er.room_id = eb.room_id AND redacts = eb.event_id) + SELECT redact_event.event_id, redacts FROM redactions + INNER JOIN events AS redact_event USING (event_id) + INNER JOIN events AS original_event ON ( + redact_event.room_id = original_event.room_id + AND redacts = original_event.event_id + ) WHERE NOT have_censored - AND ? <= er.stream_ordering AND er.stream_ordering <= ? - ORDER BY er.stream_ordering ASC + AND ? <= redact_event.stream_ordering AND redact_event.stream_ordering <= ? + ORDER BY redact_event.stream_ordering ASC LIMIT ? """ -- cgit 1.4.1 From 8b9ade8c7871c862cf2122a156f00e411cd7a276 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 13:40:05 +0100 Subject: Default to censoring redactions after seven days --- docs/sample_config.yaml | 8 +++++--- synapse/config/server.py | 10 ++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index e23b80d2b8..24adc3da2f 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -306,10 +306,12 @@ listeners: # #allow_per_room_profiles: false -# How long to keep redacted events in unredacted form in the database. -# By default redactions are kept indefinitely. +# How long to keep redacted events in unredacted form in the database. After +# this period redacted events get replaced with their redacted form in the DB. # -#redaction_retention_period: 30d +# Defaults to `7d`. Set to `null` to disable. +# +redaction_retention_period: 7d ## TLS ## diff --git a/synapse/config/server.py b/synapse/config/server.py index aa71835dc3..c8b9fe2d0f 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -164,7 +164,7 @@ class ServerConfig(Config): # How long to keep redacted events in the database in unredacted form # before redacting them. - redaction_retention_period = config.get("redaction_retention_period") + redaction_retention_period = config.get("redaction_retention_period", "7d") if redaction_retention_period is not None: self.redaction_retention_period = self.parse_duration( redaction_retention_period @@ -729,10 +729,12 @@ class ServerConfig(Config): # #allow_per_room_profiles: false - # How long to keep redacted events in unredacted form in the database. - # By default redactions are kept indefinitely. + # How long to keep redacted events in unredacted form in the database. After + # this period redacted events get replaced with their redacted form in the DB. # - #redaction_retention_period: 30d + # Defaults to `7d`. Set to `null` to disable. + # + redaction_retention_period: 7d """ % locals() ) -- cgit 1.4.1 From 8c03cd0e5f73fb59ee773dc6cce77f2dc4dab827 Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 16:40:40 +0300 Subject: Simplify is_real_user_txn check to trust user_type is null if real user Signed-off-by: Jason Robinson --- synapse/storage/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 9387b29503..54b0846c54 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -358,7 +358,7 @@ class RegistrationWorkerStore(SQLBaseStore): retcol="user_type", allow_none=True, ) - return True if res is None or res == "" else False + return res is None def is_support_user_txn(self, txn, user_id): res = self._simple_select_one_onecol_txn( -- cgit 1.4.1 From e89fea4f04c6fc7df41c5cade63609b513a98073 Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 16:43:32 +0300 Subject: Simplify count_real_users SQL to only count user_type is null rows Signed-off-by: Jason Robinson --- synapse/storage/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 54b0846c54..c0ca25733b 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -450,7 +450,7 @@ class RegistrationWorkerStore(SQLBaseStore): def _count_users(txn): txn.execute( - "SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''" + "SELECT COUNT(*) AS users FROM users where user_type is null" ) rows = self.cursor_to_dict(txn) if rows: -- cgit 1.4.1 From 580f3df9b2573c0278dd952d1478689e5cd23a7b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Sep 2019 15:08:24 +0100 Subject: Fix comments --- synapse/storage/events.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 0da6e0b1a1..ddf7ab6479 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1559,7 +1559,8 @@ class EventsStore( @defer.inlineCallbacks def _censor_redactions(self): - """Censors all redactions older than a month that haven't been censored. + """Censors all redactions older than the configured period that haven't + been censored yet. By censor we mean update the event_json table with the redacted event. @@ -1575,7 +1576,7 @@ class EventsStore( ) # We fetch all redactions that: - # 1. point to an event we have that has, + # 1. point to an event we have, # 2. has a stream ordering from before the cut off, and # 3. we haven't yet censored. # -- cgit 1.4.1 From aaed6b39e140195a0f2b48e4de0519e08f16a119 Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 9 Sep 2019 17:10:02 +0300 Subject: Fix code style, again Signed-off-by: Jason Robinson --- synapse/storage/registration.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index c0ca25733b..109052fa41 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -449,9 +449,7 @@ class RegistrationWorkerStore(SQLBaseStore): """Counts all users without a special user_type registered on the homeserver.""" def _count_users(txn): - txn.execute( - "SELECT COUNT(*) AS users FROM users where user_type is null" - ) + txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null") rows = self.cursor_to_dict(txn) if rows: return rows[0]["users"] -- cgit 1.4.1 From aeb9b2179eaa4b468bec937570d3ac7de7ccaaea Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 10 Sep 2019 00:14:58 +1000 Subject: Add a build info metric to Prometheus (#6005) --- changelog.d/6005.feature | 1 + synapse/metrics/__init__.py | 12 ++++++++++++ tests/test_metrics.py | 22 ++++++++++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6005.feature diff --git a/changelog.d/6005.feature b/changelog.d/6005.feature new file mode 100644 index 0000000000..ed6491d3e4 --- /dev/null +++ b/changelog.d/6005.feature @@ -0,0 +1 @@ +The new Prometheus metric `synapse_build_info` exposes the Python version, OS version, and Synapse version of the running server. diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 488280b4a6..b5c9595cb9 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -29,11 +29,13 @@ from prometheus_client.core import REGISTRY, GaugeMetricFamily, HistogramMetricF from twisted.internet import reactor +import synapse from synapse.metrics._exposition import ( MetricsResource, generate_latest, start_http_server, ) +from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) @@ -385,6 +387,16 @@ event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name" # finished being processed. event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"]) +# Build info of the running server. +build_info = Gauge( + "synapse_build_info", "Build information", ["pythonversion", "version", "osversion"] +) +build_info.labels( + " ".join([platform.python_implementation(), platform.python_version()]), + get_version_string(synapse), + " ".join([platform.system(), platform.release()]), +).set(1) + last_ticked = time.time() diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 2edbae5c6d..270f853d60 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2018 New Vector Ltd +# Copyright 2019 Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from synapse.metrics import InFlightGauge +from synapse.metrics import REGISTRY, InFlightGauge, generate_latest from tests import unittest @@ -111,3 +111,21 @@ class TestMauLimit(unittest.TestCase): } return results + + +class BuildInfoTests(unittest.TestCase): + def test_get_build(self): + """ + The synapse_build_info metric reports the OS version, Python version, + and Synapse version. + """ + items = list( + filter( + lambda x: b"synapse_build_info{" in x, + generate_latest(REGISTRY).split(b"\n"), + ) + ) + self.assertEqual(len(items), 1) + self.assertTrue(b"osversion=" in items[0]) + self.assertTrue(b"pythonversion=" in items[0]) + self.assertTrue(b"version=" in items[0]) -- cgit 1.4.1 From 60d3c57bd0c977cbe6b7585a2c1517cc4e2c16dd Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 10 Sep 2019 15:57:32 +0100 Subject: Use account_threepid_delegate for 3pid validation --- synapse/handlers/auth.py | 11 ++++- synapse/handlers/identity.py | 73 +++++++++++++++------------------ synapse/rest/client/v2_alpha/account.py | 3 +- 3 files changed, 45 insertions(+), 42 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index d0c0142740..374372b69e 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -444,7 +444,16 @@ class AuthHandler(BaseHandler): logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - threepid = yield identity_handler.threepid_from_creds(threepid_creds) + if medium == "email": + threepid = yield identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_email, threepid_creds + ) + elif medium == "msisdn": + threepid = yield identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_msisdn, threepid_creds + ) + else: + raise SynapseError(400, "Unrecognized threepid medium: %s" % (medium,)) elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: row = yield self.store.get_threepid_validation_session( medium, diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 71b5a87392..2dfb79fde1 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -75,59 +75,52 @@ class IdentityHandler(BaseHandler): return client_secret, id_server, id_access_token @defer.inlineCallbacks - def threepid_from_creds(self, creds, use_v2=True): + def threepid_from_creds(self, id_server, creds): """ - Retrieve and validate a threepid identitier from a "credentials" dictionary + Retrieve and validate a threepid identifier from a "credentials" dictionary against a + given identity server Args: - creds (dict[str, str]): Dictionary of credentials that contain the following keys: + id_server (str|None): The identity server to validate 3PIDs against. If None, + we will attempt to extract id_server creds + + creds (dict[str, str]): Dictionary containing the following key: + * id_server: An optional domain name of an identity server * client_secret|clientSecret: A unique secret str provided by the client - * id_server|idServer: the domain of the identity server to query - * id_access_token: The access token to authenticate to the identity - server with. Required if use_v2 is true - use_v2 (bool): Whether to use v2 Identity Service API endpoints + * sid: The ID of the validation session Returns: Deferred[dict[str,str|int]|None]: A dictionary consisting of response params to the /getValidated3pid endpoint of the Identity Service API, or None if the threepid was not found """ - client_secret, id_server, id_access_token = self._extract_items_from_creds_dict( - creds - ) - - # If an id_access_token is not supplied, force usage of v1 - if id_access_token is None: - use_v2 = False - - query_params = {"sid": creds["sid"], "client_secret": client_secret} - - # Decide which API endpoint URLs and query parameters to use - if use_v2: - url = "https://%s%s" % ( - id_server, - "/_matrix/identity/v2/3pid/getValidated3pid", + client_secret = creds.get("client_secret") or creds.get("clientSecret") + if not client_secret: + raise SynapseError( + 400, "Missing param client_secret in creds", errcode=Codes.MISSING_PARAM ) - query_params["id_access_token"] = id_access_token - else: - url = "https://%s%s" % ( - id_server, - "/_matrix/identity/api/v1/3pid/getValidated3pid", + session_id = creds.get("sid") + if not session_id: + raise SynapseError( + 400, "Missing param session_id in creds", errcode=Codes.MISSING_PARAM ) + if not id_server: + # Attempt to get the id_server from the creds dict + id_server = creds.get("id_server") + if not id_server: + raise SynapseError( + 400, "Missing param id_server in creds", errcode=Codes.MISSING_PARAM + ) + + query_params = {"sid": session_id, "client_secret": client_secret} + + url = "https://%s%s" % ( + id_server, + "/_matrix/identity/api/v1/3pid/getValidated3pid", + ) - try: - data = yield self.http_client.get_json(url, query_params) - return data if "medium" in data else None - except HttpResponseException as e: - if e.code != 404 or not use_v2: - # Generic failure - logger.info("getValidated3pid failed with Matrix error: %r", e) - raise e.to_synapse_error() - - # This identity server is too old to understand Identity Service API v2 - # Attempt v1 endpoint - logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", url) - return (yield self.threepid_from_creds(creds, use_v2=False)) + data = yield self.http_client.get_json(url, query_params) + return data if "medium" in data else None @defer.inlineCallbacks def bind_threepid(self, creds, mxid, use_v2=True): diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 785d01ea52..94a8fec8f7 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -523,7 +523,8 @@ class ThreepidRestServlet(RestServlet): requester = yield self.auth.get_user_by_req(request) user_id = requester.user.to_string() - threepid = yield self.identity_handler.threepid_from_creds(threepid_creds) + # Retrieve the identity server from the request + threepid = yield self.identity_handler.threepid_from_creds(None, threepid_creds) if not threepid: raise SynapseError(400, "Failed to auth 3pid", Codes.THREEPID_AUTH_FAILED) -- cgit 1.4.1 From b5833a2abf788a4144602c3e0de15d371608094b Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 10 Sep 2019 17:43:57 +0100 Subject: Add changelog --- changelog.d/6011.feature | 1 + synapse/handlers/identity.py | 6 +++--- synapse/rest/client/v2_alpha/account.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6011.feature diff --git a/changelog.d/6011.feature b/changelog.d/6011.feature new file mode 100644 index 0000000000..ad16acb12b --- /dev/null +++ b/changelog.d/6011.feature @@ -0,0 +1 @@ +Use account_threepid_delegate.email and account_threepid_delegate.msisdn for validating threepid sessions. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 2dfb79fde1..f6d1d1717e 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -84,8 +84,8 @@ class IdentityHandler(BaseHandler): id_server (str|None): The identity server to validate 3PIDs against. If None, we will attempt to extract id_server creds - creds (dict[str, str]): Dictionary containing the following key: - * id_server: An optional domain name of an identity server + creds (dict[str, str]): Dictionary containing the following keys: + * id_server|idServer: An optional domain name of an identity server * client_secret|clientSecret: A unique secret str provided by the client * sid: The ID of the validation session @@ -106,7 +106,7 @@ class IdentityHandler(BaseHandler): ) if not id_server: # Attempt to get the id_server from the creds dict - id_server = creds.get("id_server") + id_server = creds.get("id_server") or creds.get("idServer") if not id_server: raise SynapseError( 400, "Missing param id_server in creds", errcode=Codes.MISSING_PARAM diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 94a8fec8f7..2ea515d2f6 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -523,7 +523,7 @@ class ThreepidRestServlet(RestServlet): requester = yield self.auth.get_user_by_req(request) user_id = requester.user.to_string() - # Retrieve the identity server from the request + # Specify None as the identity server to retrieve it from the request body instead threepid = yield self.identity_handler.threepid_from_creds(None, threepid_creds) if not threepid: -- cgit 1.4.1 From cd17a2085eb517d24c68e33cd3906375a8baeb3b Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 11 Sep 2019 10:37:17 +0100 Subject: Remove origin parameter from add_display_name_to_third_party_invite and add params to docstring (#6010) Another small fixup noticed during work on a larger PR. The `origin` field of `add_display_name_to_third_party_invite` is not used and likely was just carried over from the `on_PUT` method of `FederationThirdPartyInviteExchangeServlet` which, like all other servlets, provides an `origin` argument. Since it's not used anywhere in the handler function though, we should remove it from the function arguments. --- changelog.d/6010.misc | 1 + synapse/federation/federation_server.py | 4 ++-- synapse/federation/transport/server.py | 2 +- synapse/handlers/federation.py | 7 ++++++- 4 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6010.misc diff --git a/changelog.d/6010.misc b/changelog.d/6010.misc new file mode 100644 index 0000000000..0659f12ebd --- /dev/null +++ b/changelog.d/6010.misc @@ -0,0 +1 @@ +Remove unused `origin` argument on FederationHandler.add_display_name_to_third_party_invite. \ No newline at end of file diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index e5f0b90aec..da06ab379d 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -669,9 +669,9 @@ class FederationServer(FederationBase): return ret @defer.inlineCallbacks - def on_exchange_third_party_invite_request(self, origin, room_id, event_dict): + def on_exchange_third_party_invite_request(self, room_id, event_dict): ret = yield self.handler.on_exchange_third_party_invite_request( - origin, room_id, event_dict + room_id, event_dict ) return ret diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 132a8fb5e6..7dc696c7ae 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -575,7 +575,7 @@ class FederationThirdPartyInviteExchangeServlet(BaseFederationServlet): async def on_PUT(self, origin, content, query, room_id): content = await self.handler.on_exchange_third_party_invite_request( - origin, room_id, content + room_id, content ) return 200, content diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 538b16efd6..f72b81d419 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2530,12 +2530,17 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks @log_function - def on_exchange_third_party_invite_request(self, origin, room_id, event_dict): + def on_exchange_third_party_invite_request(self, room_id, event_dict): """Handle an exchange_third_party_invite request from a remote server The remote server will call this when it wants to turn a 3pid invite into a normal m.room.member invite. + Args: + room_id (str): The ID of the room. + + event_dict (dict[str, Any]): Dictionary containing the event body. + Returns: Deferred: resolves (to None) """ -- cgit 1.4.1 From 54ce81c86d163b883df67b97540426759a9f6363 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 10:46:38 +0100 Subject: Allow use of different ratelimits for admin redactions. This is useful to allow room admins to quickly deal with a large number of abusive messages. --- synapse/config/ratelimiting.py | 13 +++++++++++++ synapse/handlers/_base.py | 43 +++++++++++++++++++++++++++++++----------- synapse/handlers/message.py | 8 +++++++- synapse/server.py | 4 ++++ 4 files changed, 56 insertions(+), 12 deletions(-) diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 33f31cf213..b4df6612d6 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -80,6 +80,12 @@ class RatelimitConfig(Config): "federation_rr_transactions_per_room_per_second", 50 ) + rc_admin_redaction = config.get("rc_admin_redaction") + if rc_admin_redaction: + self.rc_admin_redaction = RateLimitConfig(rc_admin_redaction) + else: + self.rc_admin_redaction = None + def generate_config_section(self, **kwargs): return """\ ## Ratelimiting ## @@ -102,6 +108,9 @@ class RatelimitConfig(Config): # - one for login that ratelimits login requests based on the account the # client is attempting to log into, based on the amount of failed login # attempts for this account. + # - one for ratelimiting redactions by room admins. If this is not explicitly + # set then it uses the same ratelimiting as per rc_message. This is useful + # to allow room admins to quickly deal with abuse quickly. # # The defaults are as shown below. # @@ -123,6 +132,10 @@ class RatelimitConfig(Config): # failed_attempts: # per_second: 0.17 # burst_count: 3 + # + #rc_admin_redaction: + # per_second: 1 + # burst_count: 50 # Ratelimiting settings for incoming federation diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index c29c78bd65..853b72d8e7 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -45,6 +45,7 @@ class BaseHandler(object): self.state_handler = hs.get_state_handler() self.distributor = hs.get_distributor() self.ratelimiter = hs.get_ratelimiter() + self.admin_redaction_ratelimiter = hs.get_admin_redaction_ratelimiter() self.clock = hs.get_clock() self.hs = hs @@ -53,7 +54,7 @@ class BaseHandler(object): self.event_builder_factory = hs.get_event_builder_factory() @defer.inlineCallbacks - def ratelimit(self, requester, update=True): + def ratelimit(self, requester, update=True, is_admin_redaction=False): """Ratelimits requests. Args: @@ -62,6 +63,9 @@ class BaseHandler(object): Set to False when doing multiple checks for one request (e.g. to check up front if we would reject the request), and set to True for the last call for a given request. + is_admin_redaction (bool): Whether this is a room admin/moderator + redacting an event. If so then we may apply different + ratelimits depending on config. Raises: LimitExceededError if the request should be ratelimited @@ -90,16 +94,33 @@ class BaseHandler(object): messages_per_second = override.messages_per_second burst_count = override.burst_count else: - messages_per_second = self.hs.config.rc_message.per_second - burst_count = self.hs.config.rc_message.burst_count - - allowed, time_allowed = self.ratelimiter.can_do_action( - user_id, - time_now, - rate_hz=messages_per_second, - burst_count=burst_count, - update=update, - ) + # We default to different values if this is an admin redaction and + # the config is set + if is_admin_redaction and self.hs.config.rc_admin_redaction: + messages_per_second = self.hs.config.rc_admin_redaction.per_second + burst_count = self.hs.config.rc_admin_redaction.burst_count + else: + messages_per_second = self.hs.config.rc_message.per_second + burst_count = self.hs.config.rc_message.burst_count + + if is_admin_redaction and self.hs.config.rc_admin_redaction: + # If we have separate config for admin redactions we use a separate + # ratelimiter. + allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action( + user_id, + time_now, + rate_hz=messages_per_second, + burst_count=burst_count, + update=update, + ) + else: + allowed, time_allowed = self.ratelimiter.can_do_action( + user_id, + time_now, + rate_hz=messages_per_second, + burst_count=burst_count, + update=update, + ) if not allowed: raise LimitExceededError( retry_after_ms=int(1000 * (time_allowed - time_now)) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 111f7c7e2f..184170ef8b 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -729,7 +729,13 @@ class EventCreationHandler(object): assert not self.config.worker_app if ratelimit: - yield self.base_handler.ratelimit(requester) + is_admin_redaction = ( + event.type == EventTypes.Redaction + and event.sender != requester.user.to_string() + ) + yield self.base_handler.ratelimit( + requester, is_admin_redaction=is_admin_redaction + ) yield self.base_handler.maybe_kick_guest_users(event, context) diff --git a/synapse/server.py b/synapse/server.py index 9e28dba2b1..1fcc7375d3 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -221,6 +221,7 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() self.ratelimiter = Ratelimiter() + self.admin_redaction_ratelimiter = Ratelimiter() self.registration_ratelimiter = Ratelimiter() self.datastore = None @@ -279,6 +280,9 @@ class HomeServer(object): def get_registration_ratelimiter(self): return self.registration_ratelimiter + def get_admin_redaction_ratelimiter(self): + return self.admin_redaction_ratelimiter + def build_federation_client(self): return FederationClient(self) -- cgit 1.4.1 From 2434c0084b670f54eb530b3ab6fad26aea6a075d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 10:48:52 +0100 Subject: Newsfile --- changelog.d/6015.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6015.feature diff --git a/changelog.d/6015.feature b/changelog.d/6015.feature new file mode 100644 index 0000000000..42aaffced9 --- /dev/null +++ b/changelog.d/6015.feature @@ -0,0 +1 @@ +Add config option to increase ratelimits for room admins redacting messages. -- cgit 1.4.1 From 8df88b5ff37641a07a1e75f0a08d3744c5140452 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 10:58:26 +0100 Subject: Update sample config --- docs/sample_config.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 9b1ae58a27..ce657921b1 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -518,6 +518,9 @@ log_config: "CONFDIR/SERVERNAME.log.config" # - one for login that ratelimits login requests based on the account the # client is attempting to log into, based on the amount of failed login # attempts for this account. +# - one for ratelimiting redactions by room admins. If this is not explicitly +# set then it uses the same ratelimiting as per rc_message. This is useful +# to allow room admins to quickly deal with abuse quickly. # # The defaults are as shown below. # @@ -539,6 +542,10 @@ log_config: "CONFDIR/SERVERNAME.log.config" # failed_attempts: # per_second: 0.17 # burst_count: 3 +# +#rc_admin_redaction: +# per_second: 1 +# burst_count: 50 # Ratelimiting settings for incoming federation -- cgit 1.4.1 From c64c3bb4c5b740e3f505708bc5dde0b5b29de6b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 11:16:17 +0100 Subject: Fix how we check for self redaction --- synapse/handlers/message.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 184170ef8b..f975909416 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -729,10 +729,24 @@ class EventCreationHandler(object): assert not self.config.worker_app if ratelimit: - is_admin_redaction = ( - event.type == EventTypes.Redaction - and event.sender != requester.user.to_string() - ) + # We check if this is a room admin redacting an event so that we + # can apply different ratelimiting. We do this by simply checking + # its not a self-redaction (to avoid having to look up whether the + # user is actually admin or not). + is_admin_redaction = False + if event.type == EventTypes.Redaction: + original_event = yield self.store.get_event( + event.redacts, + check_redacted=False, + get_prev_content=False, + allow_rejected=False, + allow_none=True, + ) + + is_admin_redaction = ( + original_event and event.sender != original_event.sender + ) + yield self.base_handler.ratelimit( requester, is_admin_redaction=is_admin_redaction ) -- cgit 1.4.1 From caa9d6fed719a8a80eb4a998d32f09577d04f927 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 11:16:23 +0100 Subject: Add test for admin redaction ratelimiting. --- tests/rest/client/test_redactions.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py index fe66e397c4..1b1e991c42 100644 --- a/tests/rest/client/test_redactions.py +++ b/tests/rest/client/test_redactions.py @@ -30,6 +30,14 @@ class RedactionsTestCase(HomeserverTestCase): sync.register_servlets, ] + def make_homeserver(self, reactor, clock): + config = self.default_config() + + config["rc_message"] = {"per_second": 0.2, "burst_count": 10} + config["rc_admin_redaction"] = {"per_second": 1, "burst_count": 100} + + return self.setup_test_homeserver(config=config) + def prepare(self, reactor, clock, hs): # register a couple of users self.mod_user_id = self.register_user("user1", "pass") @@ -177,3 +185,20 @@ class RedactionsTestCase(HomeserverTestCase): self._redact_event( self.other_access_token, self.room_id, create_event_id, expect_code=403 ) + + def test_redact_event_as_moderator_ratelimit(self): + """Tests that the correct ratelimiting is applied to redactions + """ + + message_ids = [] + # as a regular user, send messages to redact + for _ in range(20): + b = self.helper.send(room_id=self.room_id, tok=self.other_access_token) + message_ids.append(b["event_id"]) + self.reactor.advance(10) # To get around ratelimits + + # as the moderator, send a bunch of redactions redaction + for msg_id in message_ids: + # These should all succeed, even though this would be denied by + # standard message ratelimiter + self._redact_event(self.mod_access_token, self.room_id, msg_id) -- cgit 1.4.1 From 3505ffcda7d04a9c0100ff423a2239d1e6340fd0 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 11 Sep 2019 11:59:45 +0100 Subject: Fix existing v2 identity server calls (MSC2140) (#6013) Two things I missed while implementing [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140/files#diff-c03a26de5ac40fb532de19cb7fc2aaf7R80). 1. Access tokens should be provided to the identity server as `access_token`, not `id_access_token`, even though the homeserver may accept the tokens as `id_access_token`. 2. Access tokens must be sent to the identity server in a query parameter, the JSON body is not allowed. We now send the access token as part of an `Authorization: ...` header, which fixes both things. The breaking code was added in https://github.com/matrix-org/synapse/pull/5892 Sytest PR: https://github.com/matrix-org/sytest/pull/697 --- changelog.d/6013.misc | 1 + synapse/handlers/identity.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6013.misc diff --git a/changelog.d/6013.misc b/changelog.d/6013.misc new file mode 100644 index 0000000000..939fe8c655 --- /dev/null +++ b/changelog.d/6013.misc @@ -0,0 +1 @@ +Compatibility with v2 Identity Service APIs other than /lookup. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index f0549666c3..f690fd04a3 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -74,6 +74,25 @@ class IdentityHandler(BaseHandler): id_access_token = creds.get("id_access_token") return client_secret, id_server, id_access_token + def create_id_access_token_header(self, id_access_token): + """Create an Authorization header for passing to SimpleHttpClient as the header value + of an HTTP request. + + Args: + id_access_token (str): An identity server access token. + + Returns: + list[str]: The ascii-encoded bearer token encased in a list. + """ + # Prefix with Bearer + bearer_token = "Bearer %s" % id_access_token + + # Encode headers to standard ascii + bearer_token.encode("ascii") + + # Return as a list as that's how SimpleHttpClient takes header values + return [bearer_token] + @defer.inlineCallbacks def threepid_from_creds(self, id_server, creds): """ @@ -155,15 +174,20 @@ class IdentityHandler(BaseHandler): use_v2 = False # Decide which API endpoint URLs to use + headers = {} bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid} if use_v2: bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,) - bind_data["id_access_token"] = id_access_token + headers["Authorization"] = self.create_id_access_token_header( + id_access_token + ) else: bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,) try: - data = yield self.http_client.post_json_get_json(bind_url, bind_data) + data = yield self.http_client.post_json_get_json( + bind_url, bind_data, headers=headers + ) logger.debug("bound threepid %r to %s", creds, mxid) # Remember where we bound the threepid -- cgit 1.4.1 From 57dd41a45b4df5d736e2f30d40926b60f367b500 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 13:54:50 +0100 Subject: Fix comments Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- synapse/config/ratelimiting.py | 2 +- synapse/handlers/_base.py | 2 +- synapse/handlers/message.py | 2 +- tests/rest/client/test_redactions.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index b4df6612d6..587e2862b7 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -110,7 +110,7 @@ class RatelimitConfig(Config): # attempts for this account. # - one for ratelimiting redactions by room admins. If this is not explicitly # set then it uses the same ratelimiting as per rc_message. This is useful - # to allow room admins to quickly deal with abuse quickly. + # to allow room admins to deal with abuse quickly. # # The defaults are as shown below. # diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 853b72d8e7..d15c6282fb 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -105,7 +105,7 @@ class BaseHandler(object): if is_admin_redaction and self.hs.config.rc_admin_redaction: # If we have separate config for admin redactions we use a separate - # ratelimiter. + # ratelimiter allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action( user_id, time_now, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f975909416..1f8272784e 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -731,7 +731,7 @@ class EventCreationHandler(object): if ratelimit: # We check if this is a room admin redacting an event so that we # can apply different ratelimiting. We do this by simply checking - # its not a self-redaction (to avoid having to look up whether the + # it's not a self-redaction (to avoid having to look up whether the # user is actually admin or not). is_admin_redaction = False if event.type == EventTypes.Redaction: diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py index 1b1e991c42..d2bcf256fa 100644 --- a/tests/rest/client/test_redactions.py +++ b/tests/rest/client/test_redactions.py @@ -197,8 +197,8 @@ class RedactionsTestCase(HomeserverTestCase): message_ids.append(b["event_id"]) self.reactor.advance(10) # To get around ratelimits - # as the moderator, send a bunch of redactions redaction + # as the moderator, send a bunch of redactions for msg_id in message_ids: # These should all succeed, even though this would be denied by - # standard message ratelimiter + # the standard message ratelimiter self._redact_event(self.mod_access_token, self.room_id, msg_id) -- cgit 1.4.1 From 6604b64fae970f534d3e2a61f2fbbe51599fa26d Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Wed, 11 Sep 2019 14:00:37 +0100 Subject: Check dependencies on setup in the nicer way. (#5989) --- changelog.d/5989.misc | 1 + synapse/config/metrics.py | 12 +++++------- synapse/config/repository.py | 27 +++++++-------------------- synapse/python_dependencies.py | 8 +++++++- 4 files changed, 20 insertions(+), 28 deletions(-) create mode 100644 changelog.d/5989.misc diff --git a/changelog.d/5989.misc b/changelog.d/5989.misc new file mode 100644 index 0000000000..9f2525fd3e --- /dev/null +++ b/changelog.d/5989.misc @@ -0,0 +1 @@ +Clean up dependency checking at setup. diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 653b990e67..9eb1e55ddb 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -16,11 +16,9 @@ import attr -from ._base import Config, ConfigError +from synapse.python_dependencies import DependencyException, check_requirements -MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentry - integration. - """ +from ._base import Config, ConfigError @attr.s @@ -51,9 +49,9 @@ class MetricsConfig(Config): self.sentry_enabled = "sentry" in config if self.sentry_enabled: try: - import sentry_sdk # noqa F401 - except ImportError: - raise ConfigError(MISSING_SENTRY) + check_requirements("sentry") + except DependencyException as e: + raise ConfigError(e.message) self.sentry_dsn = config["sentry"].get("dsn") if not self.sentry_dsn: diff --git a/synapse/config/repository.py b/synapse/config/repository.py index fdb1f246d0..34f1a9a92d 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -16,6 +16,7 @@ import os from collections import namedtuple +from synapse.python_dependencies import DependencyException, check_requirements from synapse.util.module_loader import load_module from ._base import Config, ConfigError @@ -34,17 +35,6 @@ THUMBNAIL_SIZE_YAML = """\ # method: %(method)s """ -MISSING_NETADDR = "Missing netaddr library. This is required for URL preview API." - -MISSING_LXML = """Missing lxml library. This is required for URL preview API. - - Install by running: - pip install lxml - - Requires libxslt1-dev system package. - """ - - ThumbnailRequirement = namedtuple( "ThumbnailRequirement", ["width", "height", "method", "media_type"] ) @@ -171,16 +161,10 @@ class ContentRepositoryConfig(Config): self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: try: - import lxml - - lxml # To stop unused lint. - except ImportError: - raise ConfigError(MISSING_LXML) + check_requirements("url_preview") - try: - from netaddr import IPSet - except ImportError: - raise ConfigError(MISSING_NETADDR) + except DependencyException as e: + raise ConfigError(e.message) if "url_preview_ip_range_blacklist" not in config: raise ConfigError( @@ -189,6 +173,9 @@ class ContentRepositoryConfig(Config): "to work" ) + # netaddr is a dependency for url_preview + from netaddr import IPSet + self.url_preview_ip_range_blacklist = IPSet( config["url_preview_ip_range_blacklist"] ) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index ec0ac547c1..07345e916a 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -147,7 +147,13 @@ def check_requirements(for_feature=None): ) except DistributionNotFound: deps_needed.append(dependency) - errors.append("Needed %s but it was not installed" % (dependency,)) + if for_feature: + errors.append( + "Needed %s for the '%s' feature but it was not installed" + % (dependency, for_feature) + ) + else: + errors.append("Needed %s but it was not installed" % (dependency,)) if not for_feature: # Check the optional dependencies are up to date. We allow them to not be -- cgit 1.4.1 From 9c555f37e30f339708dfd9a66687c4cd638aa957 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 11 Sep 2019 14:23:24 +0100 Subject: Add note about extra arg to send_membership_event, remove arg in remote_reject_invite (#6009) Some small fixes to `room_member.py` found while doing other PRs. 1. Add requester to the base `_remote_reject_invite` method. 2. `send_membership_event`'s docstring was out of date and took in a `remote_room_hosts` arg that was not used and no calling function provided. --- changelog.d/6009.misc | 1 + synapse/handlers/room_member.py | 12 ++---------- 2 files changed, 3 insertions(+), 10 deletions(-) create mode 100644 changelog.d/6009.misc diff --git a/changelog.d/6009.misc b/changelog.d/6009.misc new file mode 100644 index 0000000000..fea479e1dd --- /dev/null +++ b/changelog.d/6009.misc @@ -0,0 +1 @@ +Small refactor of function arguments and docstrings in RoomMemberHandler. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 093f2ea36e..a3a3d4d143 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -100,7 +100,7 @@ class RoomMemberHandler(object): raise NotImplementedError() @abc.abstractmethod - def _remote_reject_invite(self, remote_room_hosts, room_id, target): + def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): """Attempt to reject an invite for a room this server is not in. If we fail to do so we locally mark the invite as rejected. @@ -510,9 +510,7 @@ class RoomMemberHandler(object): return res @defer.inlineCallbacks - def send_membership_event( - self, requester, event, context, remote_room_hosts=None, ratelimit=True - ): + def send_membership_event(self, requester, event, context, ratelimit=True): """ Change the membership status of a user in a room. @@ -522,16 +520,10 @@ class RoomMemberHandler(object): act as the sender, will be skipped. event (SynapseEvent): The membership event. context: The context of the event. - is_guest (bool): Whether the sender is a guest. - room_hosts ([str]): Homeservers which are likely to already be in - the room, and could be danced with in order to join this - homeserver for the first time. ratelimit (bool): Whether to rate limit this request. Raises: SynapseError if there was a problem changing the membership. """ - remote_room_hosts = remote_room_hosts or [] - target_user = UserID.from_string(event.state_key) room_id = event.room_id -- cgit 1.4.1 From 66ace43546e516938fa40b38bdd0d8d3ca9f9e31 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Sep 2019 14:50:40 +0100 Subject: Update sample config --- docs/sample_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index ce657921b1..c970a1c679 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -520,7 +520,7 @@ log_config: "CONFDIR/SERVERNAME.log.config" # attempts for this account. # - one for ratelimiting redactions by room admins. If this is not explicitly # set then it uses the same ratelimiting as per rc_message. This is useful -# to allow room admins to quickly deal with abuse quickly. +# to allow room admins to deal with abuse quickly. # # The defaults are as shown below. # -- cgit 1.4.1 From 7902bf1e1d6331e7964ac498988925cc26e18f79 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 11 Sep 2019 15:14:56 +0100 Subject: Clean up some code in the retry logic (#6017) * remove some unused code * make things which were constants into constants for efficiency and clarity --- changelog.d/6017.misc | 1 + synapse/storage/transactions.py | 20 -------------------- synapse/util/retryutils.py | 29 +++++++++++++---------------- 3 files changed, 14 insertions(+), 36 deletions(-) create mode 100644 changelog.d/6017.misc diff --git a/changelog.d/6017.misc b/changelog.d/6017.misc new file mode 100644 index 0000000000..5ccab9c6ca --- /dev/null +++ b/changelog.d/6017.misc @@ -0,0 +1 @@ +Clean up some code in the retry logic. diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index b3c3bf55bc..d81ace0ece 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -250,26 +250,6 @@ class TransactionStore(SQLBaseStore): }, ) - def get_destinations_needing_retry(self): - """Get all destinations which are due a retry for sending a transaction. - - Returns: - list: A list of dicts - """ - - return self.runInteraction( - "get_destinations_needing_retry", self._get_destinations_needing_retry - ) - - def _get_destinations_needing_retry(self, txn): - query = ( - "SELECT * FROM destinations" - " WHERE retry_last_ts > 0 and retry_next_ts < ?" - ) - - txn.execute(query, (self._clock.time_msec(),)) - return self.cursor_to_dict(txn) - def _start_cleanup_transactions(self): return run_as_background_process( "cleanup_transactions", self._cleanup_transactions diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 0862b5ca5a..5b16a81617 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -22,6 +22,15 @@ from synapse.api.errors import CodeMessageException logger = logging.getLogger(__name__) +# the intial backoff, after the first transaction fails +MIN_RETRY_INTERVAL = 10 * 60 * 1000 + +# how much we multiply the backoff by after each subsequent fail +RETRY_MULTIPLIER = 5 + +# a cap on the backoff +MAX_RETRY_INTERVAL = 24 * 60 * 60 * 1000 + class NotRetryingDestination(Exception): def __init__(self, retry_last_ts, retry_interval, destination): @@ -112,9 +121,6 @@ class RetryDestinationLimiter(object): clock, store, retry_interval, - min_retry_interval=10 * 60 * 1000, - max_retry_interval=24 * 60 * 60 * 1000, - multiplier_retry_interval=5, backoff_on_404=False, backoff_on_failure=True, ): @@ -130,12 +136,6 @@ class RetryDestinationLimiter(object): retry_interval (int): The next retry interval taken from the database in milliseconds, or zero if the last request was successful. - min_retry_interval (int): The minimum retry interval to use after - a failed request, in milliseconds. - max_retry_interval (int): The maximum retry interval to use after - a failed request, in milliseconds. - multiplier_retry_interval (int): The multiplier to use to increase - the retry interval after a failed request. backoff_on_404 (bool): Back off if we get a 404 backoff_on_failure (bool): set to False if we should not increase the @@ -146,9 +146,6 @@ class RetryDestinationLimiter(object): self.destination = destination self.retry_interval = retry_interval - self.min_retry_interval = min_retry_interval - self.max_retry_interval = max_retry_interval - self.multiplier_retry_interval = multiplier_retry_interval self.backoff_on_404 = backoff_on_404 self.backoff_on_failure = backoff_on_failure @@ -196,13 +193,13 @@ class RetryDestinationLimiter(object): else: # We couldn't connect. if self.retry_interval: - self.retry_interval *= self.multiplier_retry_interval + self.retry_interval *= RETRY_MULTIPLIER self.retry_interval *= int(random.uniform(0.8, 1.4)) - if self.retry_interval >= self.max_retry_interval: - self.retry_interval = self.max_retry_interval + if self.retry_interval >= MAX_RETRY_INTERVAL: + self.retry_interval = MAX_RETRY_INTERVAL else: - self.retry_interval = self.min_retry_interval + self.retry_interval = MIN_RETRY_INTERVAL logger.info( "Connection to %s was unsuccessful (%s(%s)); backoff now %i", -- cgit 1.4.1 From 9fc71dc5eed7531454a34f8fec34bd451458c7c6 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 11 Sep 2019 16:02:42 +0100 Subject: Use the v2 Identity Service API for lookups (MSC2134 + MSC2140) (#5976) This is a redo of https://github.com/matrix-org/synapse/pull/5897 but with `id_access_token` accepted. Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus Identity Service v2 authentication ala [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140). Identity lookup-related functions were also moved from `RoomMemberHandler` to `IdentityHandler`. --- changelog.d/5897.feature | 1 + synapse/handlers/identity.py | 56 ++++++++----- synapse/handlers/room.py | 4 +- synapse/handlers/room_member.py | 178 +++++++++++++++++++++++++++++++++++++--- synapse/rest/client/v1/room.py | 1 + synapse/util/hash.py | 33 ++++++++ 6 files changed, 238 insertions(+), 35 deletions(-) create mode 100644 changelog.d/5897.feature create mode 100644 synapse/util/hash.py diff --git a/changelog.d/5897.feature b/changelog.d/5897.feature new file mode 100644 index 0000000000..1557e559e8 --- /dev/null +++ b/changelog.d/5897.feature @@ -0,0 +1 @@ +Switch to using the v2 Identity Service `/lookup` API where available, with fallback to v1. (Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus id_access_token authentication for v2 Identity Service APIs from [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140)). diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index f690fd04a3..512f38e5a6 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -74,25 +74,6 @@ class IdentityHandler(BaseHandler): id_access_token = creds.get("id_access_token") return client_secret, id_server, id_access_token - def create_id_access_token_header(self, id_access_token): - """Create an Authorization header for passing to SimpleHttpClient as the header value - of an HTTP request. - - Args: - id_access_token (str): An identity server access token. - - Returns: - list[str]: The ascii-encoded bearer token encased in a list. - """ - # Prefix with Bearer - bearer_token = "Bearer %s" % id_access_token - - # Encode headers to standard ascii - bearer_token.encode("ascii") - - # Return as a list as that's how SimpleHttpClient takes header values - return [bearer_token] - @defer.inlineCallbacks def threepid_from_creds(self, id_server, creds): """ @@ -178,9 +159,7 @@ class IdentityHandler(BaseHandler): bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid} if use_v2: bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,) - headers["Authorization"] = self.create_id_access_token_header( - id_access_token - ) + headers["Authorization"] = create_id_access_token_header(id_access_token) else: bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,) @@ -478,3 +457,36 @@ class IdentityHandler(BaseHandler): except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() + + +def create_id_access_token_header(id_access_token): + """Create an Authorization header for passing to SimpleHttpClient as the header value + of an HTTP request. + + Args: + id_access_token (str): An identity server access token. + + Returns: + list[str]: The ascii-encoded bearer token encased in a list. + """ + # Prefix with Bearer + bearer_token = "Bearer %s" % id_access_token + + # Encode headers to standard ascii + bearer_token.encode("ascii") + + # Return as a list as that's how SimpleHttpClient takes header values + return [bearer_token] + + +class LookupAlgorithm: + """ + Supported hashing algorithms when performing a 3PID lookup. + + SHA256 - Hashing an (address, medium, pepper) combo with sha256, then url-safe base64 + encoding + NONE - Not performing any hashing. Simply sending an (address, medium) combo in plaintext + """ + + SHA256 = "sha256" + NONE = "none" diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a509e11d69..970be3c846 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -579,8 +579,8 @@ class RoomCreationHandler(BaseHandler): room_id = yield self._generate_room_id(creator_id=user_id, is_public=is_public) + directory_handler = self.hs.get_handlers().directory_handler if room_alias: - directory_handler = self.hs.get_handlers().directory_handler yield directory_handler.create_association( requester=requester, room_id=room_id, @@ -665,6 +665,7 @@ class RoomCreationHandler(BaseHandler): for invite_3pid in invite_3pid_list: id_server = invite_3pid["id_server"] + id_access_token = invite_3pid.get("id_access_token") # optional address = invite_3pid["address"] medium = invite_3pid["medium"] yield self.hs.get_room_member_handler().do_3pid_invite( @@ -675,6 +676,7 @@ class RoomCreationHandler(BaseHandler): id_server, requester, txn_id=None, + id_access_token=id_access_token, ) result = {"room_id": room_id} diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index a3a3d4d143..43d10a5308 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -29,9 +29,11 @@ from twisted.internet import defer from synapse import types from synapse.api.constants import EventTypes, Membership from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError +from synapse.handlers.identity import LookupAlgorithm, create_id_access_token_header from synapse.types import RoomID, UserID from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room, user_left_room +from synapse.util.hash import sha256_and_url_safe_base64 from ._base import BaseHandler @@ -626,7 +628,7 @@ class RoomMemberHandler(object): servers.remove(room_alias.domain) servers.insert(0, room_alias.domain) - return (RoomID.from_string(room_id), servers) + return RoomID.from_string(room_id), servers @defer.inlineCallbacks def _get_inviter(self, user_id, room_id): @@ -638,7 +640,15 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def do_3pid_invite( - self, room_id, inviter, medium, address, id_server, requester, txn_id + self, + room_id, + inviter, + medium, + address, + id_server, + requester, + txn_id, + id_access_token=None, ): if self.config.block_non_admin_invites: is_requester_admin = yield self.auth.is_server_admin(requester.user) @@ -661,7 +671,12 @@ class RoomMemberHandler(object): Codes.FORBIDDEN, ) - invitee = yield self._lookup_3pid(id_server, medium, address) + if not self._enable_lookup: + raise SynapseError( + 403, "Looking up third-party identifiers is denied from this server" + ) + + invitee = yield self._lookup_3pid(id_server, medium, address, id_access_token) if invitee: yield self.update_membership( @@ -673,9 +688,47 @@ class RoomMemberHandler(object): ) @defer.inlineCallbacks - def _lookup_3pid(self, id_server, medium, address): + def _lookup_3pid(self, id_server, medium, address, id_access_token=None): """Looks up a 3pid in the passed identity server. + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + id_access_token (str|None): The access token to authenticate to the identity + server with + + Returns: + str|None: the matrix ID of the 3pid, or None if it is not recognized. + """ + if id_access_token is not None: + try: + results = yield self._lookup_3pid_v2( + id_server, id_access_token, medium, address + ) + return results + + except Exception as e: + # Catch HttpResponseExcept for a non-200 response code + # Check if this identity server does not know about v2 lookups + if isinstance(e, HttpResponseException) and e.code == 404: + # This is an old identity server that does not yet support v2 lookups + logger.warning( + "Attempted v2 lookup on v1 identity server %s. Falling " + "back to v1", + id_server, + ) + else: + logger.warning("Error when looking up hashing details: %s", e) + return None + + return (yield self._lookup_3pid_v1(id_server, medium, address)) + + @defer.inlineCallbacks + def _lookup_3pid_v1(self, id_server, medium, address): + """Looks up a 3pid in the passed identity server using v1 lookup. + Args: id_server (str): The server name (including port, if required) of the identity server to use. @@ -685,10 +738,6 @@ class RoomMemberHandler(object): Returns: str: the matrix ID of the 3pid, or None if it is not recognized. """ - if not self._enable_lookup: - raise SynapseError( - 403, "Looking up third-party identifiers is denied from this server" - ) try: data = yield self.simple_http_client.get_json( "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), @@ -702,9 +751,116 @@ class RoomMemberHandler(object): return data["mxid"] except IOError as e: - logger.warn("Error from identity server lookup: %s" % (e,)) + logger.warning("Error from v1 identity server lookup: %s" % (e,)) + + return None + + @defer.inlineCallbacks + def _lookup_3pid_v2(self, id_server, id_access_token, medium, address): + """Looks up a 3pid in the passed identity server using v2 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + id_access_token (str): The access token to authenticate to the identity server with + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + + Returns: + Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. + """ + # Check what hashing details are supported by this identity server + hash_details = yield self.simple_http_client.get_json( + "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), + {"access_token": id_access_token}, + ) + + if not isinstance(hash_details, dict): + logger.warning( + "Got non-dict object when checking hash details of %s%s: %s", + id_server_scheme, + id_server, + hash_details, + ) + raise SynapseError( + 400, + "Non-dict object from %s%s during v2 hash_details request: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Extract information from hash_details + supported_lookup_algorithms = hash_details.get("algorithms") + lookup_pepper = hash_details.get("lookup_pepper") + if ( + not supported_lookup_algorithms + or not isinstance(supported_lookup_algorithms, list) + or not lookup_pepper + or not isinstance(lookup_pepper, str) + ): + raise SynapseError( + 400, + "Invalid hash details received from identity server %s%s: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Check if any of the supported lookup algorithms are present + if LookupAlgorithm.SHA256 in supported_lookup_algorithms: + # Perform a hashed lookup + lookup_algorithm = LookupAlgorithm.SHA256 + + # Hash address, medium and the pepper with sha256 + to_hash = "%s %s %s" % (address, medium, lookup_pepper) + lookup_value = sha256_and_url_safe_base64(to_hash) + + elif LookupAlgorithm.NONE in supported_lookup_algorithms: + # Perform a non-hashed lookup + lookup_algorithm = LookupAlgorithm.NONE + + # Combine together plaintext address and medium + lookup_value = "%s %s" % (address, medium) + + else: + logger.warning( + "None of the provided lookup algorithms of %s are supported: %s", + id_server, + supported_lookup_algorithms, + ) + raise SynapseError( + 400, + "Provided identity server does not support any v2 lookup " + "algorithms that this homeserver supports.", + ) + + # Authenticate with identity server given the access token from the client + headers = {"Authorization": create_id_access_token_header(id_access_token)} + + try: + lookup_results = yield self.simple_http_client.post_json_get_json( + "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), + { + "addresses": [lookup_value], + "algorithm": lookup_algorithm, + "pepper": lookup_pepper, + }, + headers=headers, + ) + except Exception as e: + logger.warning("Error when performing a v2 3pid lookup: %s", e) + raise SynapseError( + 500, "Unknown error occurred during identity server lookup" + ) + + # Check for a mapping from what we looked up to an MXID + if "mappings" not in lookup_results or not isinstance( + lookup_results["mappings"], dict + ): + logger.warning("No results from 3pid lookup") return None + # Return the MXID if it's available, or None otherwise + mxid = lookup_results["mappings"].get(lookup_value) + return mxid + @defer.inlineCallbacks def _verify_any_signature(self, data, server_hostname): if server_hostname not in data["signatures"]: @@ -844,7 +1000,6 @@ class RoomMemberHandler(object): display_name (str): A user-friendly name to represent the invited user. """ - is_url = "%s%s/_matrix/identity/api/v1/store-invite" % ( id_server_scheme, id_server, @@ -862,7 +1017,6 @@ class RoomMemberHandler(object): "sender_display_name": inviter_display_name, "sender_avatar_url": inviter_avatar_url, } - try: data = yield self.simple_http_client.post_json_get_json( is_url, invite_config @@ -1049,7 +1203,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): # The 'except' clause is very broad, but we need to # capture everything from DNS failures upwards # - logger.warn("Failed to reject invite: %s", e) + logger.warning("Failed to reject invite: %s", e) yield self.store.locally_reject_invite(target.to_string(), room_id) return {} diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 3582259026..a6a7b3b57e 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -701,6 +701,7 @@ class RoomMembershipRestServlet(TransactionRestServlet): content["id_server"], requester, txn_id, + content.get("id_access_token"), ) return 200, {} diff --git a/synapse/util/hash.py b/synapse/util/hash.py new file mode 100644 index 0000000000..359168704e --- /dev/null +++ b/synapse/util/hash.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib + +import unpaddedbase64 + + +def sha256_and_url_safe_base64(input_text): + """SHA256 hash an input string, encode the digest as url-safe base64, and + return + + :param input_text: string to hash + :type input_text: str + + :returns a sha256 hashed and url-safe base64 encoded digest + :rtype: str + """ + digest = hashlib.sha256(input_text.encode()).digest() + return unpaddedbase64.encode_base64(digest, urlsafe=True) -- cgit 1.4.1 From 6d847d8ce69f2cb849633265aaeb4a9df4ff713d Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Wed, 11 Sep 2019 20:22:18 +0300 Subject: Ensure support users can be registered even if MAU limit is reached This allows support users to be created even on MAU limits via the admin API. Support users are excluded from MAU after creation, so it makes sense to exclude them in creation - except if the whole host is in disabled state. Signed-off-by: Jason Robinson --- changelog.d/6020.bugfix | 1 + synapse/api/auth.py | 11 +++++++++-- tests/api/test_auth.py | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6020.bugfix diff --git a/changelog.d/6020.bugfix b/changelog.d/6020.bugfix new file mode 100644 index 0000000000..58a7deba9d --- /dev/null +++ b/changelog.d/6020.bugfix @@ -0,0 +1 @@ +Ensure support users can be registered even if MAU limit is reached. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index ddc195bc32..9e445cd808 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -25,7 +25,7 @@ from twisted.internet import defer import synapse.logging.opentracing as opentracing import synapse.types from synapse import event_auth -from synapse.api.constants import EventTypes, JoinRules, Membership +from synapse.api.constants import EventTypes, JoinRules, Membership, UserTypes from synapse.api.errors import ( AuthError, Codes, @@ -709,7 +709,7 @@ class Auth(object): ) @defer.inlineCallbacks - def check_auth_blocking(self, user_id=None, threepid=None): + def check_auth_blocking(self, user_id=None, threepid=None, user_type=None): """Checks if the user should be rejected for some external reason, such as monthly active user limiting or global disable flag @@ -722,6 +722,9 @@ class Auth(object): with a MAU blocked server, normally they would be rejected but their threepid is on the reserved list. user_id and threepid should never be set at the same time. + + user_type(str|None): If present, is used to decide whether to check against + certain blocking reasons like MAU. """ # Never fail an auth check for the server notices users or support user @@ -759,6 +762,10 @@ class Auth(object): self.hs.config.mau_limits_reserved_threepids, threepid ): return + elif user_type == UserTypes.SUPPORT: + # If the user does not exist yet and is of type "support", + # allow registration. Support users are excluded from MAU checks. + return # Else if there is no room in the MAU bucket, bail current_mau = yield self.store.get_monthly_active_count() if current_mau >= self.hs.config.max_mau_value: diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index c0cb8ef296..6121efcfa9 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -21,6 +21,7 @@ from twisted.internet import defer import synapse.handlers.auth from synapse.api.auth import Auth +from synapse.api.constants import UserTypes from synapse.api.errors import ( AuthError, Codes, @@ -335,6 +336,23 @@ class AuthTestCase(unittest.TestCase): ) yield self.auth.check_auth_blocking() + @defer.inlineCallbacks + def test_blocking_mau__depending_on_user_type(self): + self.hs.config.max_mau_value = 50 + self.hs.config.limit_usage_by_mau = True + + self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100)) + # Support users allowed + yield self.auth.check_auth_blocking(user_type=UserTypes.SUPPORT) + self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100)) + # Bots not allowed + with self.assertRaises(ResourceLimitError): + yield self.auth.check_auth_blocking(user_type=UserTypes.BOT) + self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100)) + # Real users not allowed + with self.assertRaises(ResourceLimitError): + yield self.auth.check_auth_blocking() + @defer.inlineCallbacks def test_reserved_threepid(self): self.hs.config.limit_usage_by_mau = True -- cgit 1.4.1 From a8251da10f98a251b9aa0be1f313d8d2e4ac1c3f Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 12 Sep 2019 10:57:37 +0100 Subject: Blow up config if opentracing is missing (#5985) * Blow up config if opentracing is missing --- changelog.d/5985.feature | 1 + synapse/config/tracer.py | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 changelog.d/5985.feature diff --git a/changelog.d/5985.feature b/changelog.d/5985.feature new file mode 100644 index 0000000000..e5e29504af --- /dev/null +++ b/changelog.d/5985.feature @@ -0,0 +1 @@ +Check at setup that opentracing is installed if it's enabled in the config. diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index 95e7ccb3a3..85d99a3166 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from synapse.python_dependencies import DependencyException, check_requirements + from ._base import Config, ConfigError @@ -32,6 +34,11 @@ class TracerConfig(Config): if not self.opentracer_enabled: return + try: + check_requirements("opentracing") + except DependencyException as e: + raise ConfigError(e.message) + # The tracer is enabled so sanitize the config self.opentracer_whitelist = opentracing_config.get("homeserver_whitelist", []) -- cgit 1.4.1 From dd2e5b0038dbe9812775e5943e5bccf550d7468a Mon Sep 17 00:00:00 2001 From: Sorunome Date: Thu, 12 Sep 2019 12:24:57 +0200 Subject: add report_stats_endpoint config option (#6012) This PR adds the optional `report_stats_endpoint` to configure where stats are reported to, if enabled. --- changelog.d/6012.feature | 1 + docs/sample_config.yaml | 5 +++++ synapse/app/homeserver.py | 6 ++++-- synapse/config/metrics.py | 9 +++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6012.feature diff --git a/changelog.d/6012.feature b/changelog.d/6012.feature new file mode 100644 index 0000000000..25425510c6 --- /dev/null +++ b/changelog.d/6012.feature @@ -0,0 +1 @@ +Add report_stats_endpoint option to configure where stats are reported to, if enabled. Contributed by @Sorunome. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index c970a1c679..dd4e2d5ebd 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -985,6 +985,11 @@ metrics_flags: # Whether or not to report anonymized homeserver usage statistics. # report_stats: true|false +# The endpoint to report the anonymized homeserver usage statistics to. +# Defaults to https://matrix.org/report-usage-stats/push +# +#report_stats_endpoint: https://example.com/report-usage-stats/push + ## API Configuration ## diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 04f1ed14f3..774326dff9 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -561,10 +561,12 @@ def run(hs): stats["database_engine"] = hs.get_datastore().database_engine_name stats["database_server_version"] = hs.get_datastore().get_server_version() - logger.info("Reporting stats to matrix.org: %s" % (stats,)) + logger.info( + "Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats) + ) try: yield hs.get_simple_http_client().put_json( - "https://matrix.org/report-usage-stats/push", stats + hs.config.report_stats_endpoint, stats ) except Exception as e: logger.warn("Error reporting stats: %s", e) diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 9eb1e55ddb..ec35a6b868 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -37,6 +37,9 @@ class MetricsConfig(Config): def read_config(self, config, **kwargs): self.enable_metrics = config.get("enable_metrics", False) self.report_stats = config.get("report_stats", None) + self.report_stats_endpoint = config.get( + "report_stats_endpoint", "https://matrix.org/report-usage-stats/push" + ) self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") @@ -95,4 +98,10 @@ class MetricsConfig(Config): else: res += "report_stats: %s\n" % ("true" if report_stats else "false") + res += """ + # The endpoint to report the anonymized homeserver usage statistics to. + # Defaults to https://matrix.org/report-usage-stats/push + # + #report_stats_endpoint: https://example.com/report-usage-stats/push + """ return res -- cgit 1.4.1 From 642fad8bd47ffcb74d970de632a7316dfc15d26b Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 12 Sep 2019 11:42:47 +0100 Subject: Fix SSO fallback login Well, it worked, but forgot to remove the thing saying login was unavailable. --- synapse/static/client/login/js/login.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/static/client/login/js/login.js b/synapse/static/client/login/js/login.js index e02663f50e..276c271bbe 100644 --- a/synapse/static/client/login/js/login.js +++ b/synapse/static/client/login/js/login.js @@ -62,7 +62,7 @@ var show_login = function() { $("#sso_flow").show(); } - if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas) { + if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas && !matrixLogin.serverAcceptsSso) { $("#no_login_types").show(); } }; -- cgit 1.4.1 From 6db22e4702fcaa7b4f6b814e215071d5aca6893e Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 12 Sep 2019 11:46:37 +0100 Subject: changelog --- changelog.d/6024.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6024.bugfix diff --git a/changelog.d/6024.bugfix b/changelog.d/6024.bugfix new file mode 100644 index 0000000000..ddad34595b --- /dev/null +++ b/changelog.d/6024.bugfix @@ -0,0 +1 @@ +Fix bug where login error was shown incorrectly on SSO fallback login. -- cgit 1.4.1 From 0388beafe48d1ae9c30565c37b8902b9aa0b8fe2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 12 Sep 2019 12:59:43 +0100 Subject: Fix bug in calculating the federation retry backoff period (#6025) This was intended to introduce an element of jitter; instead it gave you a 30/60 chance of resetting to zero. --- changelog.d/6025.bugfix | 1 + synapse/util/retryutils.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6025.bugfix diff --git a/changelog.d/6025.bugfix b/changelog.d/6025.bugfix new file mode 100644 index 0000000000..50d7f9aab5 --- /dev/null +++ b/changelog.d/6025.bugfix @@ -0,0 +1 @@ +Fix bug in calculating the federation retry backoff period. \ No newline at end of file diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 5b16a81617..33263fe20f 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -193,8 +193,9 @@ class RetryDestinationLimiter(object): else: # We couldn't connect. if self.retry_interval: - self.retry_interval *= RETRY_MULTIPLIER - self.retry_interval *= int(random.uniform(0.8, 1.4)) + self.retry_interval = int( + self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4) + ) if self.retry_interval >= MAX_RETRY_INTERVAL: self.retry_interval = MAX_RETRY_INTERVAL -- cgit 1.4.1 From 3d882a7ba52114f18ec6be61c51561db203a0534 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 12 Sep 2019 13:00:13 +0100 Subject: Remove the cap on federation retry interval. (#6026) Essentially the intention here is to end up blacklisting servers which never respond to federation requests. Fixes https://github.com/matrix-org/synapse/issues/5113. --- changelog.d/6026.feature | 1 + synapse/util/retryutils.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6026.feature diff --git a/changelog.d/6026.feature b/changelog.d/6026.feature new file mode 100644 index 0000000000..2489ff09b5 --- /dev/null +++ b/changelog.d/6026.feature @@ -0,0 +1 @@ +Stop sending federation transactions to servers which have been down for a long time. diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 33263fe20f..b740913b58 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -28,8 +28,8 @@ MIN_RETRY_INTERVAL = 10 * 60 * 1000 # how much we multiply the backoff by after each subsequent fail RETRY_MULTIPLIER = 5 -# a cap on the backoff -MAX_RETRY_INTERVAL = 24 * 60 * 60 * 1000 +# a cap on the backoff. (Essentially none) +MAX_RETRY_INTERVAL = 2 ** 63 class NotRetryingDestination(Exception): -- cgit 1.4.1 From b617864cd9f81109e818bc5ae95bee317d917b72 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 13 Sep 2019 02:29:55 +1000 Subject: Fix for structured logging tests stomping on logs (#6023) --- MANIFEST.in | 12 +++++---- changelog.d/6023.misc | 1 + mypy.ini | 54 ++++++++++++++++++++++++++++++++++++++++ synapse/config/logger.py | 33 ++++++++++++++++++------ synapse/logging/_structured.py | 8 +++--- synapse/logging/_terse_json.py | 8 +++--- synapse/logging/opentracing.py | 4 +-- synapse/metrics/__init__.py | 5 ++-- synapse/metrics/_exposition.py | 4 ++- synapse/python_dependencies.py | 7 +++--- tests/logging/test_structured.py | 25 ++++++++++++++++--- tests/logging/test_terse_json.py | 4 +-- tox.ini | 30 +++++++++++++++++----- 13 files changed, 154 insertions(+), 41 deletions(-) create mode 100644 changelog.d/6023.misc create mode 100644 mypy.ini diff --git a/MANIFEST.in b/MANIFEST.in index 919cd8a1cd..9c2902b8d2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -38,14 +38,16 @@ exclude sytest-blacklist include pyproject.toml recursive-include changelog.d * -prune .github -prune demo/etc -prune docker +prune .buildkite prune .circleci +prune .codecov.yml prune .coveragerc +prune .github prune debian -prune .codecov.yml -prune .buildkite +prune demo/etc +prune docker +prune mypy.ini +prune stubs exclude jenkins* recursive-exclude jenkins *.sh diff --git a/changelog.d/6023.misc b/changelog.d/6023.misc new file mode 100644 index 0000000000..d80410c22c --- /dev/null +++ b/changelog.d/6023.misc @@ -0,0 +1 @@ +Fix the structured logging tests stomping on the global log configuration for subsequent tests. diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000000..8788574ee3 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,54 @@ +[mypy] +namespace_packages=True +plugins=mypy_zope:plugin +follow_imports=skip +mypy_path=stubs + +[mypy-synapse.config.homeserver] +# this is a mess because of the metaclass shenanigans +ignore_errors = True + +[mypy-zope] +ignore_missing_imports = True + +[mypy-constantly] +ignore_missing_imports = True + +[mypy-twisted.*] +ignore_missing_imports = True + +[mypy-treq.*] +ignore_missing_imports = True + +[mypy-hyperlink] +ignore_missing_imports = True + +[mypy-h11] +ignore_missing_imports = True + +[mypy-opentracing] +ignore_missing_imports = True + +[mypy-OpenSSL] +ignore_missing_imports = True + +[mypy-netaddr] +ignore_missing_imports = True + +[mypy-saml2.*] +ignore_missing_imports = True + +[mypy-unpaddedbase64] +ignore_missing_imports = True + +[mypy-canonicaljson] +ignore_missing_imports = True + +[mypy-jaeger_client] +ignore_missing_imports = True + +[mypy-jsonschema] +ignore_missing_imports = True + +[mypy-signedjson.*] +ignore_missing_imports = True diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 2704c18720..767ecfdf09 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -21,7 +21,12 @@ from string import Template import yaml -from twisted.logger import STDLibLogObserver, globalLogBeginner +from twisted.logger import ( + ILogObserver, + LogBeginner, + STDLibLogObserver, + globalLogBeginner, +) import synapse from synapse.app import _base as appbase @@ -124,7 +129,7 @@ class LoggingConfig(Config): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def _setup_stdlib_logging(config, log_config): +def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner): """ Set up Python stdlib logging. """ @@ -165,12 +170,12 @@ def _setup_stdlib_logging(config, log_config): return observer(event) - globalLogBeginner.beginLoggingTo( - [_log], redirectStandardIO=not config.no_redirect_stdio - ) + logBeginner.beginLoggingTo([_log], redirectStandardIO=not config.no_redirect_stdio) if not config.no_redirect_stdio: print("Redirected stdout/stderr to logs") + return observer + def _reload_stdlib_logging(*args, log_config=None): logger = logging.getLogger("") @@ -181,7 +186,9 @@ def _reload_stdlib_logging(*args, log_config=None): logging.config.dictConfig(log_config) -def setup_logging(hs, config, use_worker_options=False): +def setup_logging( + hs, config, use_worker_options=False, logBeginner: LogBeginner = globalLogBeginner +) -> ILogObserver: """ Set up the logging subsystem. @@ -191,6 +198,12 @@ def setup_logging(hs, config, use_worker_options=False): use_worker_options (bool): True to use the 'worker_log_config' option instead of 'log_config'. + + logBeginner: The Twisted logBeginner to use. + + Returns: + The "root" Twisted Logger observer, suitable for sending logs to from a + Logger instance. """ log_config = config.worker_log_config if use_worker_options else config.log_config @@ -210,10 +223,12 @@ def setup_logging(hs, config, use_worker_options=False): log_config_body = read_config() if log_config_body and log_config_body.get("structured") is True: - setup_structured_logging(hs, config, log_config_body) + logger = setup_structured_logging( + hs, config, log_config_body, logBeginner=logBeginner + ) appbase.register_sighup(read_config, callback=reload_structured_logging) else: - _setup_stdlib_logging(config, log_config_body) + logger = _setup_stdlib_logging(config, log_config_body, logBeginner=logBeginner) appbase.register_sighup(read_config, callback=_reload_stdlib_logging) # make sure that the first thing we log is a thing we can grep backwards @@ -221,3 +236,5 @@ def setup_logging(hs, config, use_worker_options=False): logging.warn("***** STARTING SERVER *****") logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse)) logging.info("Server hostname: %s", config.server_name) + + return logger diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 0367d6dfc4..3220e985a9 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -18,6 +18,7 @@ import os.path import sys import typing import warnings +from typing import List import attr from constantly import NamedConstant, Names, ValueConstant, Values @@ -33,7 +34,6 @@ from twisted.logger import ( LogLevelFilterPredicate, LogPublisher, eventAsText, - globalLogBeginner, jsonFileLogObserver, ) @@ -134,7 +134,7 @@ class PythonStdlibToTwistedLogger(logging.Handler): ) -def SynapseFileLogObserver(outFile: typing.io.TextIO) -> FileLogObserver: +def SynapseFileLogObserver(outFile: typing.IO[str]) -> FileLogObserver: """ A log observer that formats events like the traditional log formatter and sends them to `outFile`. @@ -265,7 +265,7 @@ def setup_structured_logging( hs, config, log_config: dict, - logBeginner: LogBeginner = globalLogBeginner, + logBeginner: LogBeginner, redirect_stdlib_logging: bool = True, ) -> LogPublisher: """ @@ -286,7 +286,7 @@ def setup_structured_logging( if "drains" not in log_config: raise ConfigError("The logging configuration requires a list of drains.") - observers = [] + observers = [] # type: List[ILogObserver] for observer in parse_drain_configs(log_config["drains"]): # Pipe drains diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 7f1e8f23fe..0ebbde06f2 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -21,10 +21,11 @@ import sys from collections import deque from ipaddress import IPv4Address, IPv6Address, ip_address from math import floor -from typing.io import TextIO +from typing import IO import attr from simplejson import dumps +from zope.interface import implementer from twisted.application.internet import ClientService from twisted.internet.endpoints import ( @@ -33,7 +34,7 @@ from twisted.internet.endpoints import ( TCP6ClientEndpoint, ) from twisted.internet.protocol import Factory, Protocol -from twisted.logger import FileLogObserver, Logger +from twisted.logger import FileLogObserver, ILogObserver, Logger from twisted.python.failure import Failure @@ -129,7 +130,7 @@ def flatten_event(event: dict, metadata: dict, include_time: bool = False): return new_event -def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObserver: +def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogObserver: """ A log observer that formats events to a flattened JSON representation. @@ -146,6 +147,7 @@ def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObs @attr.s +@implementer(ILogObserver) class TerseJSONToTCPLogObserver(object): """ An IObserver that writes JSON logs to a TCP target. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 7246253018..308a27213b 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -223,8 +223,8 @@ try: from jaeger_client import Config as JaegerConfig from synapse.logging.scopecontextmanager import LogContextScopeManager except ImportError: - JaegerConfig = None - LogContextScopeManager = None + JaegerConfig = None # type: ignore + LogContextScopeManager = None # type: ignore logger = logging.getLogger(__name__) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index b5c9595cb9..bec3b13397 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -20,6 +20,7 @@ import os import platform import threading import time +from typing import Dict, Union import six @@ -42,9 +43,7 @@ logger = logging.getLogger(__name__) METRICS_PREFIX = "/_synapse/metrics" running_on_pypy = platform.python_implementation() == "PyPy" -all_metrics = [] -all_collectors = [] -all_gauges = {} +all_gauges = {} # type: Dict[str, Union[LaterGauge, InFlightGauge, BucketCollector]] HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py index 1933ecd3e3..74d9c3ecd3 100644 --- a/synapse/metrics/_exposition.py +++ b/synapse/metrics/_exposition.py @@ -36,7 +36,9 @@ from twisted.web.resource import Resource try: from prometheus_client.samples import Sample except ImportError: - Sample = namedtuple("Sample", ["name", "labels", "value", "timestamp", "exemplar"]) + Sample = namedtuple( + "Sample", ["name", "labels", "value", "timestamp", "exemplar"] + ) # type: ignore CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8") diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 07345e916a..0bd563edc7 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -15,6 +15,7 @@ # limitations under the License. import logging +from typing import Set from pkg_resources import ( DistributionNotFound, @@ -97,7 +98,7 @@ CONDITIONAL_REQUIREMENTS = { "jwt": ["pyjwt>=1.6.4"], } -ALL_OPTIONAL_REQUIREMENTS = set() +ALL_OPTIONAL_REQUIREMENTS = set() # type: Set[str] for name, optional_deps in CONDITIONAL_REQUIREMENTS.items(): # Exclude systemd as it's a system-based requirement. @@ -174,8 +175,8 @@ def check_requirements(for_feature=None): pass if deps_needed: - for e in errors: - logging.error(e) + for err in errors: + logging.error(err) raise DependencyException(deps_needed) diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py index a786de0233..451d05c0f0 100644 --- a/tests/logging/test_structured.py +++ b/tests/logging/test_structured.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os import os.path import shutil @@ -33,7 +34,20 @@ class FakeBeginner(object): self.observers = observers -class StructuredLoggingTestCase(HomeserverTestCase): +class StructuredLoggingTestBase(object): + """ + Test base that registers a cleanup handler to reset the stdlib log handler + to 'unset'. + """ + + def prepare(self, reactor, clock, hs): + def _cleanup(): + logging.getLogger("synapse").setLevel(logging.NOTSET) + + self.addCleanup(_cleanup) + + +class StructuredLoggingTestCase(StructuredLoggingTestBase, HomeserverTestCase): """ Tests for Synapse's structured logging support. """ @@ -139,7 +153,9 @@ class StructuredLoggingTestCase(HomeserverTestCase): self.assertEqual(logs[0]["request"], "somereq") -class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase): +class StructuredLoggingConfigurationFileTestCase( + StructuredLoggingTestBase, HomeserverTestCase +): def make_homeserver(self, reactor, clock): tempdir = self.mktemp() @@ -179,10 +195,11 @@ class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase): """ When a structured logging config is given, Synapse will use it. """ - setup_logging(self.hs, self.hs.config) + beginner = FakeBeginner() + publisher = setup_logging(self.hs, self.hs.config, logBeginner=beginner) # Make a logger and send an event - logger = Logger(namespace="tests.logging.test_structured") + logger = Logger(namespace="tests.logging.test_structured", observer=publisher) with LoggingContext("testcontext", request="somereq"): logger.info("Hello there, {name}!", name="steve") diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 514282591d..4cf81f7128 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -23,10 +23,10 @@ from synapse.logging._structured import setup_structured_logging from tests.server import connect_client from tests.unittest import HomeserverTestCase -from .test_structured import FakeBeginner +from .test_structured import FakeBeginner, StructuredLoggingTestBase -class TerseJSONTCPTestCase(HomeserverTestCase): +class TerseJSONTCPTestCase(StructuredLoggingTestBase, HomeserverTestCase): def test_log_output(self): """ The Terse JSON outputter delivers simplified structured logs over TCP. diff --git a/tox.ini b/tox.ini index 7cb40847b5..1bce10a4ce 100644 --- a/tox.ini +++ b/tox.ini @@ -2,6 +2,7 @@ envlist = packaging, py35, py36, py37, check_codestyle, check_isort [base] +basepython = python3.7 deps = mock python-subunit @@ -137,18 +138,35 @@ commands = {toxinidir}/scripts-dev/generate_sample_config --check skip_install = True deps = coverage -whitelist_externals = - bash commands= coverage combine coverage report +[testenv:cov-erase] +skip_install = True +deps = + coverage +commands= + coverage erase + +[testenv:cov-html] +skip_install = True +deps = + coverage +commands= + coverage html + [testenv:mypy] -basepython = python3.5 +basepython = python3.7 +skip_install = True deps = {[base]deps} mypy + mypy-zope + typeshed +env = + MYPYPATH = stubs/ extras = all -commands = mypy --ignore-missing-imports \ - synapse/logging/_structured.py \ - synapse/logging/_terse_json.py +commands = mypy --show-traceback \ + synapse/logging/ \ + synapse/config/ -- cgit 1.4.1 From 9eaa5d6d2427a6c3edcdf18c0868c697c17fd6d4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 12 Sep 2019 21:13:31 +0100 Subject: README: link to reverse_proxy.rst (#6027) --- README.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.rst b/README.rst index bbff8de5ab..fbbf958d6c 100644 --- a/README.rst +++ b/README.rst @@ -381,3 +381,16 @@ indicate that your server is also issuing far more outgoing federation requests than can be accounted for by your users' activity, this is a likely cause. The misbehavior can be worked around by setting ``use_presence: false`` in the Synapse config file. + +People can't accept room invitations from me +-------------------------------------------- + +The typical failure mode here is that you send an invitation to someone +to join a room or direct chat, but when they go to accept it, they get an +error (typically along the lines of "Invalid signature"). They might see +something like the following in their logs:: + + 2019-09-11 19:32:04,271 - synapse.federation.transport.server - 288 - WARNING - GET-11752 - authenticate_request failed: 401: Invalid signature for server with key ed25519:a_EqML: Unable to verify signature for + +This is normally caused by a misconfiguration in your reverse-proxy. See +``_ and double-check that your settings are correct. -- cgit 1.4.1 From 1c7df13e7b26f249726380cbec5a6bc7bb3daeb6 Mon Sep 17 00:00:00 2001 From: axel simon Date: Fri, 13 Sep 2019 09:50:17 +0200 Subject: add explanations on how to actually include an access_token (#6031) --- docs/admin_api/README.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/admin_api/README.rst b/docs/admin_api/README.rst index d4f564cfae..191806c5b4 100644 --- a/docs/admin_api/README.rst +++ b/docs/admin_api/README.rst @@ -10,3 +10,15 @@ server admin by updating the database directly, e.g.: ``UPDATE users SET admin = 1 WHERE name = '@foo:bar.com'`` Restarting may be required for the changes to register. + +Using an admin access_token +########################### + +Many of the API calls listed in the documentation here will require to include an admin `access_token`. +Finding your user's `access_token` is client-dependent, but will usually be shown in the client's settings. + +Once you have your `access_token`, to include it in a request, the best option is to add the token to a request header: + +``curl --header "Authorization: Bearer " `` + +Fore more details, please refer to the complete `matrix spec documentation `_. -- cgit 1.4.1 From c755955f335984dc6f97a269b57ad955f257ef8f Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Fri, 13 Sep 2019 01:58:18 -0600 Subject: Add developer docs for using SAML without a server (#6032) --- changelog.d/6032.misc | 1 + docs/dev/saml.md | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 changelog.d/6032.misc create mode 100644 docs/dev/saml.md diff --git a/changelog.d/6032.misc b/changelog.d/6032.misc new file mode 100644 index 0000000000..ec5b5eb881 --- /dev/null +++ b/changelog.d/6032.misc @@ -0,0 +1 @@ +Add developer documentation for using SAML2. diff --git a/docs/dev/saml.md b/docs/dev/saml.md new file mode 100644 index 0000000000..f41aadce47 --- /dev/null +++ b/docs/dev/saml.md @@ -0,0 +1,37 @@ +# How to test SAML as a developer without a server + +https://capriza.github.io/samling/samling.html (https://github.com/capriza/samling) is a great +resource for being able to tinker with the SAML options within Synapse without needing to +deploy and configure a complicated software stack. + +To make Synapse (and therefore Riot) use it: + +1. Use the samling.html URL above or deploy your own and visit the IdP Metadata tab. +2. Copy the XML to your clipboard. +3. On your Synapse server, create a new file `samling.xml` next to your `homeserver.yaml` with + the XML from step 2 as the contents. +4. Edit your `homeserver.yaml` to include: + ```yaml + saml2_config: + sp_config: + allow_unknown_attributes: true # Works around a bug with AVA Hashes: https://github.com/IdentityPython/pysaml2/issues/388 + metadata: + local: ["samling.xml"] + ``` +5. Run `apt-get install xmlsec1` and `pip install --upgrade --force 'pysaml2>=4.5.0'` to ensure + the dependencies are installed and ready to go. +6. Restart Synapse. + +Then in Riot: + +1. Visit the login page with a Riot pointing at your homeserver. +2. Click the Single Sign-On button. +3. On the samling page, enter a Name Identifier and add a SAML Attribute for `uid=your_localpart`. + The response must also be signed. +4. Click "Next". +5. Click "Post Response" (change nothing). +6. You should be logged in. + +If you try and repeat this process, you may be automatically logged in using the information you +gave previously. To fix this, open your developer console (`F12` or `Ctrl+Shift+I`) while on the +samling page and clear the site data. In Chrome, this will be a button on the Application tab. -- cgit 1.4.1 From 785cbd3999ab011440b453e07992d3b0c92a4059 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Sep 2019 12:07:03 +0100 Subject: Make the sample saml config closer to our standards It' still not great, thanks to the nested dictionaries, but it's better. --- docs/sample_config.yaml | 110 ++++++++++++++++++++------------------- synapse/config/saml2_config.py | 113 ++++++++++++++++++++++------------------- 2 files changed, 121 insertions(+), 102 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 0c6be30e51..8cfc5c312a 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1031,12 +1031,13 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # Enable SAML2 for registration and login. Uses pysaml2. # -# `sp_config` is the configuration for the pysaml2 Service Provider. -# See pysaml2 docs for format of config. +# At least one of `sp_config` or `config_path` must be set in this section to +# enable SAML login. # -# Default values will be used for the 'entityid' and 'service' settings, -# so it is not normally necessary to specify them unless you need to -# override them. +# (You will probably also want to set the following options to `false` to +# disable the regular login/registration flows: +# * enable_registration +# * password_config.enabled # # Once SAML support is enabled, a metadata file will be exposed at # https://:/_matrix/saml2/metadata.xml, which you may be able to @@ -1044,52 +1045,59 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # the IdP to use an ACS location of # https://:/_matrix/saml2/authn_response. # -#saml2_config: -# sp_config: -# # point this to the IdP's metadata. You can use either a local file or -# # (preferably) a URL. -# metadata: -# #local: ["saml2/idp.xml"] -# remote: -# - url: https://our_idp/metadata.xml -# -# # By default, the user has to go to our login page first. If you'd like to -# # allow IdP-initiated login, set 'allow_unsolicited: True' in a -# # 'service.sp' section: -# # -# #service: -# # sp: -# # allow_unsolicited: True -# -# # The examples below are just used to generate our metadata xml, and you -# # may well not need it, depending on your setup. Alternatively you -# # may need a whole lot more detail - see the pysaml2 docs! -# -# description: ["My awesome SP", "en"] -# name: ["Test SP", "en"] -# -# organization: -# name: Example com -# display_name: -# - ["Example co", "en"] -# url: "http://example.com" -# -# contact_person: -# - given_name: Bob -# sur_name: "the Sysadmin" -# email_address": ["admin@example.com"] -# contact_type": technical -# -# # Instead of putting the config inline as above, you can specify a -# # separate pysaml2 configuration file: -# # -# config_path: "CONFDIR/sp_conf.py" -# -# # the lifetime of a SAML session. This defines how long a user has to -# # complete the authentication process, if allow_unsolicited is unset. -# # The default is 5 minutes. -# # -# # saml_session_lifetime: 5m +saml2_config: + # `sp_config` is the configuration for the pysaml2 Service Provider. + # See pysaml2 docs for format of config. + # + # Default values will be used for the 'entityid' and 'service' settings, + # so it is not normally necessary to specify them unless you need to + # override them. + # + #sp_config: + # # point this to the IdP's metadata. You can use either a local file or + # # (preferably) a URL. + # metadata: + # #local: ["saml2/idp.xml"] + # remote: + # - url: https://our_idp/metadata.xml + # + # # By default, the user has to go to our login page first. If you'd like + # # to allow IdP-initiated login, set 'allow_unsolicited: True' in a + # # 'service.sp' section: + # # + # #service: + # # sp: + # # allow_unsolicited: true + # + # # The examples below are just used to generate our metadata xml, and you + # # may well not need them, depending on your setup. Alternatively you + # # may need a whole lot more detail - see the pysaml2 docs! + # + # description: ["My awesome SP", "en"] + # name: ["Test SP", "en"] + # + # organization: + # name: Example com + # display_name: + # - ["Example co", "en"] + # url: "http://example.com" + # + # contact_person: + # - given_name: Bob + # sur_name: "the Sysadmin" + # email_address": ["admin@example.com"] + # contact_type": technical + + # Instead of putting the config inline as above, you can specify a + # separate pysaml2 configuration file: + # + #config_path: "CONFDIR/sp_conf.py" + + # the lifetime of a SAML session. This defines how long a user has to + # complete the authentication process, if allow_unsolicited is unset. + # The default is 5 minutes. + # + #saml_session_lifetime: 5m diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index 6a8161547a..c46ac087db 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -26,6 +26,9 @@ class SAML2Config(Config): if not saml2_config or not saml2_config.get("enabled", True): return + if not saml2_config.get("sp_config") and not saml2_config.get("config_path"): + return + try: check_requirements("saml2") except DependencyException as e: @@ -76,12 +79,13 @@ class SAML2Config(Config): return """\ # Enable SAML2 for registration and login. Uses pysaml2. # - # `sp_config` is the configuration for the pysaml2 Service Provider. - # See pysaml2 docs for format of config. + # At least one of `sp_config` or `config_path` must be set in this section to + # enable SAML login. # - # Default values will be used for the 'entityid' and 'service' settings, - # so it is not normally necessary to specify them unless you need to - # override them. + # (You will probably also want to set the following options to `false` to + # disable the regular login/registration flows: + # * enable_registration + # * password_config.enabled # # Once SAML support is enabled, a metadata file will be exposed at # https://:/_matrix/saml2/metadata.xml, which you may be able to @@ -89,52 +93,59 @@ class SAML2Config(Config): # the IdP to use an ACS location of # https://:/_matrix/saml2/authn_response. # - #saml2_config: - # sp_config: - # # point this to the IdP's metadata. You can use either a local file or - # # (preferably) a URL. - # metadata: - # #local: ["saml2/idp.xml"] - # remote: - # - url: https://our_idp/metadata.xml - # - # # By default, the user has to go to our login page first. If you'd like to - # # allow IdP-initiated login, set 'allow_unsolicited: True' in a - # # 'service.sp' section: - # # - # #service: - # # sp: - # # allow_unsolicited: True - # - # # The examples below are just used to generate our metadata xml, and you - # # may well not need it, depending on your setup. Alternatively you - # # may need a whole lot more detail - see the pysaml2 docs! - # - # description: ["My awesome SP", "en"] - # name: ["Test SP", "en"] - # - # organization: - # name: Example com - # display_name: - # - ["Example co", "en"] - # url: "http://example.com" - # - # contact_person: - # - given_name: Bob - # sur_name: "the Sysadmin" - # email_address": ["admin@example.com"] - # contact_type": technical - # - # # Instead of putting the config inline as above, you can specify a - # # separate pysaml2 configuration file: - # # - # config_path: "%(config_dir_path)s/sp_conf.py" - # - # # the lifetime of a SAML session. This defines how long a user has to - # # complete the authentication process, if allow_unsolicited is unset. - # # The default is 5 minutes. - # # - # # saml_session_lifetime: 5m + saml2_config: + # `sp_config` is the configuration for the pysaml2 Service Provider. + # See pysaml2 docs for format of config. + # + # Default values will be used for the 'entityid' and 'service' settings, + # so it is not normally necessary to specify them unless you need to + # override them. + # + #sp_config: + # # point this to the IdP's metadata. You can use either a local file or + # # (preferably) a URL. + # metadata: + # #local: ["saml2/idp.xml"] + # remote: + # - url: https://our_idp/metadata.xml + # + # # By default, the user has to go to our login page first. If you'd like + # # to allow IdP-initiated login, set 'allow_unsolicited: True' in a + # # 'service.sp' section: + # # + # #service: + # # sp: + # # allow_unsolicited: true + # + # # The examples below are just used to generate our metadata xml, and you + # # may well not need them, depending on your setup. Alternatively you + # # may need a whole lot more detail - see the pysaml2 docs! + # + # description: ["My awesome SP", "en"] + # name: ["Test SP", "en"] + # + # organization: + # name: Example com + # display_name: + # - ["Example co", "en"] + # url: "http://example.com" + # + # contact_person: + # - given_name: Bob + # sur_name: "the Sysadmin" + # email_address": ["admin@example.com"] + # contact_type": technical + + # Instead of putting the config inline as above, you can specify a + # separate pysaml2 configuration file: + # + #config_path: "%(config_dir_path)s/sp_conf.py" + + # the lifetime of a SAML session. This defines how long a user has to + # complete the authentication process, if allow_unsolicited is unset. + # The default is 5 minutes. + # + #saml_session_lifetime: 5m """ % { "config_dir_path": config_dir_path } -- cgit 1.4.1 From a8ac40445c98b9e1fc2538d7d4ec49c80b0298ac Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Sep 2019 15:20:49 +0100 Subject: Record mappings from saml users in an external table We want to assign unique mxids to saml users based on an incrementing suffix. For that to work, we need to record the allocated mxid in a separate table. --- docs/sample_config.yaml | 26 ++++++ synapse/config/saml2_config.py | 78 +++++++++++++++- synapse/handlers/saml_handler.py | 103 +++++++++++++++++++-- synapse/rest/client/v1/login.py | 14 +++ synapse/storage/registration.py | 41 ++++++++ .../storage/schema/delta/56/user_external_ids.sql | 24 +++++ 6 files changed, 276 insertions(+), 10 deletions(-) create mode 100644 synapse/storage/schema/delta/56/user_external_ids.sql diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 8cfc5c312a..9021fe2cb8 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1099,6 +1099,32 @@ saml2_config: # #saml_session_lifetime: 5m + # The SAML attribute (after mapping via the attribute maps) to use to derive + # the Matrix ID from. 'uid' by default. + # + #mxid_source_attribute: displayName + + # The mapping system to use for mapping the saml attribute onto a matrix ID. + # Options include: + # * 'hexencode' (which maps unpermitted characters to '=xx') + # * 'dotreplace' (which replaces unpermitted characters with '.'). + # The default is 'hexencode'. + # + #mxid_mapping: dotreplace + + # In previous versions of synapse, the mapping from SAML attribute to MXID was + # always calculated dynamically rather than stored in a table. For backwards- + # compatibility, we will look for user_ids matching such a pattern before + # creating a new account. + # + # This setting controls the SAML attribute which will be used for this + # backwards-compatibility lookup. Typically it should be 'uid', but if the + # attribute maps are changed, it may be necessary to change it. + # + # The default is 'uid'. + # + #grandfathered_mxid_source_attribute: upn + # Enable CAS for registration and login. diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index c46ac087db..a022470702 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -12,7 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re + from synapse.python_dependencies import DependencyException, check_requirements +from synapse.types import ( + map_username_to_mxid_localpart, + mxid_localpart_allowed_characters, +) from ._base import Config, ConfigError @@ -36,6 +42,14 @@ class SAML2Config(Config): self.saml2_enabled = True + self.saml2_mxid_source_attribute = saml2_config.get( + "mxid_source_attribute", "uid" + ) + + self.saml2_grandfathered_mxid_source_attribute = saml2_config.get( + "grandfathered_mxid_source_attribute", "uid" + ) + import saml2.config self.saml2_sp_config = saml2.config.SPConfig() @@ -51,6 +65,12 @@ class SAML2Config(Config): saml2_config.get("saml_session_lifetime", "5m") ) + mapping = saml2_config.get("mxid_mapping", "hexencode") + try: + self.saml2_mxid_mapper = MXID_MAPPER_MAP[mapping] + except KeyError: + raise ConfigError("%s is not a known mxid_mapping" % (mapping,)) + def _default_saml_config_dict(self): import saml2 @@ -58,6 +78,13 @@ class SAML2Config(Config): if public_baseurl is None: raise ConfigError("saml2_config requires a public_baseurl to be set") + required_attributes = {"uid", self.saml2_mxid_source_attribute} + + optional_attributes = {"displayName"} + if self.saml2_grandfathered_mxid_source_attribute: + optional_attributes.add(self.saml2_grandfathered_mxid_source_attribute) + optional_attributes -= required_attributes + metadata_url = public_baseurl + "_matrix/saml2/metadata.xml" response_url = public_baseurl + "_matrix/saml2/authn_response" return { @@ -69,8 +96,9 @@ class SAML2Config(Config): (response_url, saml2.BINDING_HTTP_POST) ] }, - "required_attributes": ["uid"], - "optional_attributes": ["mail", "surname", "givenname"], + "required_attributes": list(required_attributes), + "optional_attributes": list(optional_attributes), + # "name_id_format": saml2.saml.NAMEID_FORMAT_PERSISTENT, } }, } @@ -146,6 +174,52 @@ class SAML2Config(Config): # The default is 5 minutes. # #saml_session_lifetime: 5m + + # The SAML attribute (after mapping via the attribute maps) to use to derive + # the Matrix ID from. 'uid' by default. + # + #mxid_source_attribute: displayName + + # The mapping system to use for mapping the saml attribute onto a matrix ID. + # Options include: + # * 'hexencode' (which maps unpermitted characters to '=xx') + # * 'dotreplace' (which replaces unpermitted characters with '.'). + # The default is 'hexencode'. + # + #mxid_mapping: dotreplace + + # In previous versions of synapse, the mapping from SAML attribute to MXID was + # always calculated dynamically rather than stored in a table. For backwards- + # compatibility, we will look for user_ids matching such a pattern before + # creating a new account. + # + # This setting controls the SAML attribute which will be used for this + # backwards-compatibility lookup. Typically it should be 'uid', but if the + # attribute maps are changed, it may be necessary to change it. + # + # The default is 'uid'. + # + #grandfathered_mxid_source_attribute: upn """ % { "config_dir_path": config_dir_path } + + +DOT_REPLACE_PATTERN = re.compile( + ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters)),)) +) + + +def dot_replace_for_mxid(username: str) -> str: + username = username.lower() + username = DOT_REPLACE_PATTERN.sub(".", username) + + # regular mxids aren't allowed to start with an underscore either + username = re.sub("^_", "", username) + return username + + +MXID_MAPPER_MAP = { + "hexencode": map_username_to_mxid_localpart, + "dotreplace": dot_replace_for_mxid, +} diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index a1ce6929cf..5fa8272dc9 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -21,6 +21,8 @@ from saml2.client import Saml2Client from synapse.api.errors import SynapseError from synapse.http.servlet import parse_string from synapse.rest.client.v1.login import SSOAuthHandler +from synapse.types import UserID, map_username_to_mxid_localpart +from synapse.util.async_helpers import Linearizer logger = logging.getLogger(__name__) @@ -29,12 +31,26 @@ class SamlHandler: def __init__(self, hs): self._saml_client = Saml2Client(hs.config.saml2_sp_config) self._sso_auth_handler = SSOAuthHandler(hs) + self._registration_handler = hs.get_registration_handler() + + self._clock = hs.get_clock() + self._datastore = hs.get_datastore() + self._hostname = hs.hostname + self._saml2_session_lifetime = hs.config.saml2_session_lifetime + self._mxid_source_attribute = hs.config.saml2_mxid_source_attribute + self._grandfathered_mxid_source_attribute = ( + hs.config.saml2_grandfathered_mxid_source_attribute + ) + self._mxid_mapper = hs.config.saml2_mxid_mapper + + # identifier for the external_ids table + self._auth_provider_id = "saml" # a map from saml session id to Saml2SessionData object self._outstanding_requests_dict = {} - self._clock = hs.get_clock() - self._saml2_session_lifetime = hs.config.saml2_session_lifetime + # a lock on the mappings + self._mapping_lock = Linearizer(name="saml_mapping", clock=self._clock) def handle_redirect_request(self, client_redirect_url): """Handle an incoming request to /login/sso/redirect @@ -60,7 +76,7 @@ class SamlHandler: # this shouldn't happen! raise Exception("prepare_for_authenticate didn't return a Location header") - def handle_saml_response(self, request): + async def handle_saml_response(self, request): """Handle an incoming request to /_matrix/saml2/authn_response Args: @@ -77,6 +93,10 @@ class SamlHandler: # the dict. self.expire_sessions() + user_id = await self._map_saml_response_to_user(resp_bytes) + self._sso_auth_handler.complete_sso_login(user_id, request, relay_state) + + async def _map_saml_response_to_user(self, resp_bytes): try: saml2_auth = self._saml_client.parse_authn_request_response( resp_bytes, @@ -91,18 +111,85 @@ class SamlHandler: logger.warning("SAML2 response was not signed") raise SynapseError(400, "SAML2 response was not signed") - if "uid" not in saml2_auth.ava: + try: + remote_user_id = saml2_auth.ava["uid"][0] + except KeyError: logger.warning("SAML2 response lacks a 'uid' attestation") raise SynapseError(400, "uid not in SAML2 response") + try: + mxid_source = saml2_auth.ava[self._mxid_source_attribute][0] + except KeyError: + logger.warning( + "SAML2 response lacks a '%s' attestation", self._mxid_source_attribute + ) + raise SynapseError( + 400, "%s not in SAML2 response" % (self._mxid_source_attribute,) + ) + self._outstanding_requests_dict.pop(saml2_auth.in_response_to, None) - username = saml2_auth.ava["uid"][0] displayName = saml2_auth.ava.get("displayName", [None])[0] - return self._sso_auth_handler.on_successful_auth( - username, request, relay_state, user_display_name=displayName - ) + with (await self._mapping_lock.queue(self._auth_provider_id)): + # first of all, check if we already have a mapping for this user + logger.info( + "Looking for existing mapping for user %s:%s", + self._auth_provider_id, + remote_user_id, + ) + registered_user_id = await self._datastore.get_user_by_external_id( + self._auth_provider_id, remote_user_id + ) + if registered_user_id is not None: + logger.info("Found existing mapping %s", registered_user_id) + return registered_user_id + + # backwards-compatibility hack: see if there is an existing user with a + # suitable mapping from the uid + if ( + self._grandfathered_mxid_source_attribute + and self._grandfathered_mxid_source_attribute in saml2_auth.ava + ): + attrval = saml2_auth.ava[self._grandfathered_mxid_source_attribute][0] + user_id = UserID( + map_username_to_mxid_localpart(attrval), self._hostname + ).to_string() + logger.info( + "Looking for existing account based on mapped %s %s", + self._grandfathered_mxid_source_attribute, + user_id, + ) + + users = await self._datastore.get_users_by_id_case_insensitive(user_id) + if users: + registered_user_id = list(users.keys())[0] + logger.info("Grandfathering mapping to %s", registered_user_id) + await self._datastore.record_user_external_id( + self._auth_provider_id, remote_user_id, registered_user_id + ) + return registered_user_id + + # figure out a new mxid for this user + base_mxid_localpart = self._mxid_mapper(mxid_source) + + suffix = 0 + while True: + localpart = base_mxid_localpart + (str(suffix) if suffix else "") + if not await self._datastore.get_users_by_id_case_insensitive( + UserID(localpart, self._hostname).to_string() + ): + break + suffix += 1 + logger.info("Allocating mxid for new user with localpart %s", localpart) + + registered_user_id = await self._registration_handler.register_user( + localpart=localpart, default_display_name=displayName + ) + await self._datastore.record_user_external_id( + self._auth_provider_id, remote_user_id, registered_user_id + ) + return registered_user_id def expire_sessions(self): expire_before = self._clock.time_msec() - self._saml2_session_lifetime diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 5762b9fd06..eeaa72b205 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -29,6 +29,7 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.rest.client.v2_alpha._base import client_patterns from synapse.rest.well_known import WellKnownBuilder from synapse.types import UserID, map_username_to_mxid_localpart @@ -507,6 +508,19 @@ class SSOAuthHandler(object): localpart=localpart, default_display_name=user_display_name ) + self.complete_sso_login(registered_user_id, request, client_redirect_url) + + def complete_sso_login( + self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str + ): + """Having figured out a mxid for this user, complete the HTTP request + + Args: + registered_user_id: + request: + client_redirect_url: + """ + login_token = self._macaroon_gen.generate_short_term_login_token( registered_user_id ) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 55e4e84d71..1e3c2148f6 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -22,6 +22,7 @@ from six import iterkeys from six.moves import range from twisted.internet import defer +from twisted.internet.defer import Deferred from synapse.api.constants import UserTypes from synapse.api.errors import Codes, StoreError, ThreepidValidationError @@ -337,6 +338,26 @@ class RegistrationWorkerStore(SQLBaseStore): return self.runInteraction("get_users_by_id_case_insensitive", f) + async def get_user_by_external_id( + self, auth_provider: str, external_id: str + ) -> str: + """Look up a user by their external auth id + + Args: + auth_provider: identifier for the remote auth provider + external_id: id on that system + + Returns: + str|None: the mxid of the user, or None if they are not known + """ + return await self._simple_select_one_onecol( + table="user_external_ids", + keyvalues={"auth_provider": auth_provider, "external_id": external_id}, + retcol="user_id", + allow_none=True, + desc="get_user_by_external_id", + ) + @defer.inlineCallbacks def count_all_users(self): """Counts all users registered on the homeserver.""" @@ -848,6 +869,26 @@ class RegistrationStore( self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) txn.call_after(self.is_guest.invalidate, (user_id,)) + def record_user_external_id( + self, auth_provider: str, external_id: str, user_id: str + ) -> Deferred: + """Record a mapping from an external user id to a mxid + + Args: + auth_provider: identifier for the remote auth provider + external_id: id on that system + user_id: complete mxid that it is mapped to + """ + return self._simple_insert( + table="user_external_ids", + values={ + "auth_provider": auth_provider, + "external_id": external_id, + "user_id": user_id, + }, + desc="record_user_external_id", + ) + def user_set_password_hash(self, user_id, password_hash): """ NB. This does *not* evict any cache because the one use for this diff --git a/synapse/storage/schema/delta/56/user_external_ids.sql b/synapse/storage/schema/delta/56/user_external_ids.sql new file mode 100644 index 0000000000..91390c4527 --- /dev/null +++ b/synapse/storage/schema/delta/56/user_external_ids.sql @@ -0,0 +1,24 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * a table which records mappings from external auth providers to mxids + */ +CREATE TABLE IF NOT EXISTS user_external_ids ( + auth_provider TEXT NOT NULL, + external_id TEXT NOT NULL, + user_id TEXT NOT NULL, + UNIQUE (auth_provider, external_id) +); -- cgit 1.4.1 From b9d57502da8ae4e11523a155e0fd608433e1025d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Sep 2019 16:06:03 +0100 Subject: changelog --- changelog.d/6037.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6037.feature diff --git a/changelog.d/6037.feature b/changelog.d/6037.feature new file mode 100644 index 0000000000..95d82bd4d8 --- /dev/null +++ b/changelog.d/6037.feature @@ -0,0 +1 @@ +Handle userid clashes when authenticating via SAML by appending an integer suffix. \ No newline at end of file -- cgit 1.4.1 From 850dcfd2d3a1d689042fb38c8a16b652244068c2 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 14 Sep 2019 04:58:38 +1000 Subject: Fix well-known lookups with the federation certificate whitelist (#5997) --- changelog.d/5996.bugfix | 1 + synapse/config/tls.py | 9 ++++- synapse/crypto/context_factory.py | 26 +++++++------- synapse/http/federation/matrix_federation_agent.py | 2 +- tests/config/test_tls.py | 40 ++++++++++++++++++++++ 5 files changed, 63 insertions(+), 15 deletions(-) create mode 100644 changelog.d/5996.bugfix diff --git a/changelog.d/5996.bugfix b/changelog.d/5996.bugfix new file mode 100644 index 0000000000..05e31faaa2 --- /dev/null +++ b/changelog.d/5996.bugfix @@ -0,0 +1 @@ +federation_certificate_verification_whitelist now will not cause TypeErrors to be raised (a regression in 1.3). Additionally, it now supports internationalised domain names in their non-canonical representation. diff --git a/synapse/config/tls.py b/synapse/config/tls.py index c0148aa95c..fc47ba3e9a 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -110,8 +110,15 @@ class TlsConfig(Config): # Support globs (*) in whitelist values self.federation_certificate_verification_whitelist = [] for entry in fed_whitelist_entries: + try: + entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii")) + except UnicodeEncodeError: + raise ConfigError( + "IDNA domain names are not allowed in the " + "federation_certificate_verification_whitelist: %s" % (entry,) + ) + # Convert globs to regex - entry_regex = glob_to_regex(entry) self.federation_certificate_verification_whitelist.append(entry_regex) # List of custom certificate authorities for federation traffic validation diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index 06e63a96b5..e93f0b3705 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -15,7 +15,6 @@ import logging -import idna from service_identity import VerificationError from service_identity.pyopenssl import verify_hostname, verify_ip_address from zope.interface import implementer @@ -114,14 +113,20 @@ class ClientTLSOptionsFactory(object): self._no_verify_ssl_context = self._no_verify_ssl.getContext() self._no_verify_ssl_context.set_info_callback(self._context_info_cb) - def get_options(self, host): + def get_options(self, host: bytes): + + # IPolicyForHTTPS.get_options takes bytes, but we want to compare + # against the str whitelist. The hostnames in the whitelist are already + # IDNA-encoded like the hosts will be here. + ascii_host = host.decode("ascii") + # Check if certificate verification has been enabled should_verify = self._config.federation_verify_certificates # Check if we've disabled certificate verification for this host if should_verify: for regex in self._config.federation_certificate_verification_whitelist: - if regex.match(host): + if regex.match(ascii_host): should_verify = False break @@ -162,7 +167,7 @@ class SSLClientConnectionCreator(object): Replaces twisted.internet.ssl.ClientTLSOptions """ - def __init__(self, hostname, ctx, verify_certs): + def __init__(self, hostname: bytes, ctx, verify_certs: bool): self._ctx = ctx self._verifier = ConnectionVerifier(hostname, verify_certs) @@ -190,21 +195,16 @@ class ConnectionVerifier(object): # This code is based on twisted.internet.ssl.ClientTLSOptions. - def __init__(self, hostname, verify_certs): + def __init__(self, hostname: bytes, verify_certs): self._verify_certs = verify_certs - if isIPAddress(hostname) or isIPv6Address(hostname): - self._hostnameBytes = hostname.encode("ascii") + _decoded = hostname.decode("ascii") + if isIPAddress(_decoded) or isIPv6Address(_decoded): self._is_ip_address = True else: - # twisted's ClientTLSOptions falls back to the stdlib impl here if - # idna is not installed, but points out that lacks support for - # IDNA2008 (http://bugs.python.org/issue17305). - # - # We can rely on having idna. - self._hostnameBytes = idna.encode(hostname) self._is_ip_address = False + self._hostnameBytes = hostname self._hostnameASCII = self._hostnameBytes.decode("ascii") def verify_context_info_cb(self, ssl_connection, where): diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index feae7de5be..647d26dc56 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -217,7 +217,7 @@ class MatrixHostnameEndpoint(object): self._tls_options = None else: self._tls_options = tls_client_options_factory.get_options( - self._parsed_uri.host.decode("ascii") + self._parsed_uri.host ) self._srv_resolver = srv_resolver diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py index 8e0c4b9533..b02780772a 100644 --- a/tests/config/test_tls.py +++ b/tests/config/test_tls.py @@ -16,6 +16,7 @@ import os +import idna import yaml from OpenSSL import SSL @@ -235,3 +236,42 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg= ) self.assertTrue(conf.acme_enabled) + + def test_whitelist_idna_failure(self): + """ + The federation certificate whitelist will not allow IDNA domain names. + """ + config = { + "federation_certificate_verification_whitelist": [ + "example.com", + "*.ドメイン.テスト", + ] + } + t = TestConfig() + e = self.assertRaises( + ConfigError, t.read_config, config, config_dir_path="", data_dir_path="" + ) + self.assertIn("IDNA domain names", str(e)) + + def test_whitelist_idna_result(self): + """ + The federation certificate whitelist will match on IDNA encoded names. + """ + config = { + "federation_certificate_verification_whitelist": [ + "example.com", + "*.xn--eckwd4c7c.xn--zckzah", + ] + } + t = TestConfig() + t.read_config(config, config_dir_path="", data_dir_path="") + + cf = ClientTLSOptionsFactory(t) + + # Not in the whitelist + opts = cf.get_options(b"notexample.com") + self.assertTrue(opts._verifier._verify_certs) + + # Caught by the wildcard + opts = cf.get_options(idna.encode("テスト.ドメイン.テスト")) + self.assertFalse(opts._verifier._verify_certs) -- cgit 1.4.1 From 1e19ce00bff8d67168d39201cdf9424f7b2f22f6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 17 Sep 2019 11:41:54 +0100 Subject: Add 'failure_ts' column to 'destinations' table (#6016) Track the time that a server started failing at, for general analysis purposes. --- changelog.d/6016.misc | 1 + .../schema/delta/56/destinations_failure_ts.sql | 25 ++++ synapse/storage/transactions.py | 23 ++-- synapse/util/retryutils.py | 16 ++- tests/handlers/test_typing.py | 7 +- tests/storage/test_transactions.py | 8 +- tests/util/test_retryutils.py | 127 +++++++++++++++++++++ 7 files changed, 195 insertions(+), 12 deletions(-) create mode 100644 changelog.d/6016.misc create mode 100644 synapse/storage/schema/delta/56/destinations_failure_ts.sql create mode 100644 tests/util/test_retryutils.py diff --git a/changelog.d/6016.misc b/changelog.d/6016.misc new file mode 100644 index 0000000000..91cf164714 --- /dev/null +++ b/changelog.d/6016.misc @@ -0,0 +1 @@ +Add a 'failure_ts' column to the 'destinations' database table. diff --git a/synapse/storage/schema/delta/56/destinations_failure_ts.sql b/synapse/storage/schema/delta/56/destinations_failure_ts.sql new file mode 100644 index 0000000000..f00889290b --- /dev/null +++ b/synapse/storage/schema/delta/56/destinations_failure_ts.sql @@ -0,0 +1,25 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Record the timestamp when a given server started failing + */ +ALTER TABLE destinations ADD failure_ts BIGINT; + +/* as a rough approximation, we assume that the server started failing at + * retry_interval before the last retry + */ +UPDATE destinations SET failure_ts = retry_last_ts - retry_interval + WHERE retry_last_ts > 0; diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index d81ace0ece..289c117396 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -165,7 +165,7 @@ class TransactionStore(SQLBaseStore): txn, table="destinations", keyvalues={"destination": destination}, - retcols=("destination", "retry_last_ts", "retry_interval"), + retcols=("destination", "failure_ts", "retry_last_ts", "retry_interval"), allow_none=True, ) @@ -174,12 +174,15 @@ class TransactionStore(SQLBaseStore): else: return None - def set_destination_retry_timings(self, destination, retry_last_ts, retry_interval): + def set_destination_retry_timings( + self, destination, failure_ts, retry_last_ts, retry_interval + ): """Sets the current retry timings for a given destination. Both timings should be zero if retrying is no longer occuring. Args: destination (str) + failure_ts (int|None) - when the server started failing (ms since epoch) retry_last_ts (int) - time of last retry attempt in unix epoch ms retry_interval (int) - how long until next retry in ms """ @@ -189,12 +192,13 @@ class TransactionStore(SQLBaseStore): "set_destination_retry_timings", self._set_destination_retry_timings, destination, + failure_ts, retry_last_ts, retry_interval, ) def _set_destination_retry_timings( - self, txn, destination, retry_last_ts, retry_interval + self, txn, destination, failure_ts, retry_last_ts, retry_interval ): if self.database_engine.can_native_upsert: @@ -202,9 +206,12 @@ class TransactionStore(SQLBaseStore): # resetting it) or greater than the existing retry interval. sql = """ - INSERT INTO destinations (destination, retry_last_ts, retry_interval) - VALUES (?, ?, ?) + INSERT INTO destinations ( + destination, failure_ts, retry_last_ts, retry_interval + ) + VALUES (?, ?, ?, ?) ON CONFLICT (destination) DO UPDATE SET + failure_ts = EXCLUDED.failure_ts, retry_last_ts = EXCLUDED.retry_last_ts, retry_interval = EXCLUDED.retry_interval WHERE @@ -212,7 +219,7 @@ class TransactionStore(SQLBaseStore): OR destinations.retry_interval < EXCLUDED.retry_interval """ - txn.execute(sql, (destination, retry_last_ts, retry_interval)) + txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval)) return @@ -225,7 +232,7 @@ class TransactionStore(SQLBaseStore): txn, table="destinations", keyvalues={"destination": destination}, - retcols=("retry_last_ts", "retry_interval"), + retcols=("failure_ts", "retry_last_ts", "retry_interval"), allow_none=True, ) @@ -235,6 +242,7 @@ class TransactionStore(SQLBaseStore): table="destinations", values={ "destination": destination, + "failure_ts": failure_ts, "retry_last_ts": retry_last_ts, "retry_interval": retry_interval, }, @@ -245,6 +253,7 @@ class TransactionStore(SQLBaseStore): "destinations", keyvalues={"destination": destination}, updatevalues={ + "failure_ts": failure_ts, "retry_last_ts": retry_last_ts, "retry_interval": retry_interval, }, diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index b740913b58..a5f2fbef5c 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -80,11 +80,13 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs) # We aren't ready to retry that destination. raise """ + failure_ts = None retry_last_ts, retry_interval = (0, 0) retry_timings = yield store.get_destination_retry_timings(destination) if retry_timings: + failure_ts = retry_timings["failure_ts"] retry_last_ts, retry_interval = ( retry_timings["retry_last_ts"], retry_timings["retry_interval"], @@ -108,6 +110,7 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs) destination, clock, store, + failure_ts, retry_interval, backoff_on_failure=backoff_on_failure, **kwargs @@ -120,6 +123,7 @@ class RetryDestinationLimiter(object): destination, clock, store, + failure_ts, retry_interval, backoff_on_404=False, backoff_on_failure=True, @@ -133,6 +137,8 @@ class RetryDestinationLimiter(object): destination (str) clock (Clock) store (DataStore) + failure_ts (int|None): when this destination started failing (in ms since + the epoch), or zero if the last request was successful retry_interval (int): The next retry interval taken from the database in milliseconds, or zero if the last request was successful. @@ -145,6 +151,7 @@ class RetryDestinationLimiter(object): self.store = store self.destination = destination + self.failure_ts = failure_ts self.retry_interval = retry_interval self.backoff_on_404 = backoff_on_404 self.backoff_on_failure = backoff_on_failure @@ -186,6 +193,7 @@ class RetryDestinationLimiter(object): logger.debug( "Connection to %s was successful; clearing backoff", self.destination ) + self.failure_ts = None retry_last_ts = 0 self.retry_interval = 0 elif not self.backoff_on_failure: @@ -211,11 +219,17 @@ class RetryDestinationLimiter(object): ) retry_last_ts = int(self.clock.time_msec()) + if self.failure_ts is None: + self.failure_ts = retry_last_ts + @defer.inlineCallbacks def store_retry_timings(): try: yield self.store.set_destination_retry_timings( - self.destination, retry_last_ts, self.retry_interval + self.destination, + self.failure_ts, + retry_last_ts, + self.retry_interval, ) except Exception: logger.exception("Failed to store destination_retry_timings") diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 5d5e324df2..1f2ef5d01f 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -99,7 +99,12 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.event_source = hs.get_event_sources().sources["typing"] self.datastore = hs.get_datastore() - retry_timings_res = {"destination": "", "retry_last_ts": 0, "retry_interval": 0} + retry_timings_res = { + "destination": "", + "retry_last_ts": 0, + "retry_interval": 0, + "failure_ts": None, + } self.datastore.get_destination_retry_timings.return_value = defer.succeed( retry_timings_res ) diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py index 14169afa96..a771d5af29 100644 --- a/tests/storage/test_transactions.py +++ b/tests/storage/test_transactions.py @@ -29,17 +29,19 @@ class TransactionStoreTestCase(HomeserverTestCase): r = self.get_success(d) self.assertIsNone(r) - d = self.store.set_destination_retry_timings("example.com", 50, 100) + d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100) self.get_success(d) d = self.store.get_destination_retry_timings("example.com") r = self.get_success(d) - self.assert_dict({"retry_last_ts": 50, "retry_interval": 100}, r) + self.assert_dict( + {"retry_last_ts": 50, "retry_interval": 100, "failure_ts": 1000}, r + ) def test_initial_set_transactions(self): """Tests that we can successfully set the destination retries (there was a bug around invalidating the cache that broke this) """ - d = self.store.set_destination_retry_timings("example.com", 50, 100) + d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100) self.get_success(d) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py new file mode 100644 index 0000000000..9e348694ad --- /dev/null +++ b/tests/util/test_retryutils.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.util.retryutils import ( + MIN_RETRY_INTERVAL, + RETRY_MULTIPLIER, + NotRetryingDestination, + get_retry_limiter, +) + +from tests.unittest import HomeserverTestCase + + +class RetryLimiterTestCase(HomeserverTestCase): + def test_new_destination(self): + """A happy-path case with a new destination and a successful operation""" + store = self.hs.get_datastore() + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + # advance the clock a bit before making the request + self.pump(1) + + with limiter: + pass + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertIsNone(new_timings) + + def test_limiter(self): + """General test case which walks through the process of a failing request""" + store = self.hs.get_datastore() + + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + self.pump(1) + try: + with limiter: + self.pump(1) + failure_ts = self.clock.time_msec() + raise AssertionError("argh") + except AssertionError: + pass + + # wait for the update to land + self.pump() + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertEqual(new_timings["failure_ts"], failure_ts) + self.assertEqual(new_timings["retry_last_ts"], failure_ts) + self.assertEqual(new_timings["retry_interval"], MIN_RETRY_INTERVAL) + + # now if we try again we should get a failure + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + self.failureResultOf(d, NotRetryingDestination) + + # + # advance the clock and try again + # + + self.pump(MIN_RETRY_INTERVAL) + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + self.pump(1) + try: + with limiter: + self.pump(1) + retry_ts = self.clock.time_msec() + raise AssertionError("argh") + except AssertionError: + pass + + # wait for the update to land + self.pump() + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertEqual(new_timings["failure_ts"], failure_ts) + self.assertEqual(new_timings["retry_last_ts"], retry_ts) + self.assertGreaterEqual( + new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5 + ) + self.assertLessEqual( + new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0 + ) + + # + # one more go, with success + # + self.pump(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0) + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + self.pump(1) + with limiter: + self.pump(1) + + # wait for the update to land + self.pump() + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertIsNone(new_timings) -- cgit 1.4.1 From 70c52821ce9e755e4a5c3081510fb1260f609ee3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Sep 2019 12:41:23 +0100 Subject: Fix race condition in room stats. (#6029) Broke in #5971 Basically the bug is that if get_current_state_deltas returns no new updates and we then take the max pos, its possible that we miss an update that happens in between the two calls. (e.g. get_current_state_deltas looks up to stream pos 5, then an event persists and so getting the max stream pos returns 6, meaning that next time we check for things with a stream pos bigger than 6) --- changelog.d/6029.bugfix | 1 + synapse/handlers/stats.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6029.bugfix diff --git a/changelog.d/6029.bugfix b/changelog.d/6029.bugfix new file mode 100644 index 0000000000..9ea095103b --- /dev/null +++ b/changelog.d/6029.bugfix @@ -0,0 +1 @@ +Fix room and user stats tracking. diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 3c265f3718..cbac7c347a 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -84,6 +84,13 @@ class StatsHandler(StateDeltasHandler): # Loop round handling deltas until we're up to date while True: + # Be sure to read the max stream_ordering *before* checking if there are any outstanding + # deltas, since there is otherwise a chance that we could miss updates which arrive + # after we check the deltas. + room_max_stream_ordering = yield self.store.get_room_max_stream_ordering() + if self.pos == room_max_stream_ordering: + break + deltas = yield self.store.get_current_state_deltas(self.pos) if deltas: @@ -94,7 +101,7 @@ class StatsHandler(StateDeltasHandler): else: room_deltas = {} user_deltas = {} - max_pos = yield self.store.get_room_max_stream_ordering() + max_pos = room_max_stream_ordering # Then count deltas for total_events and total_event_bytes. room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes( @@ -117,10 +124,9 @@ class StatsHandler(StateDeltasHandler): stream_id=max_pos, ) - event_processing_positions.labels("stats").set(max_pos) + logger.debug("Handled room stats to %s -> %s", self.pos, max_pos) - if self.pos == max_pos: - break + event_processing_positions.labels("stats").set(max_pos) self.pos = max_pos -- cgit 1.4.1 From 379d2a8c3918557bacdadea6b508bddd1ce20eaf Mon Sep 17 00:00:00 2001 From: dstipp Date: Tue, 17 Sep 2019 07:55:29 -0400 Subject: (#5849) Convert rst to markdown (#6040) Converting some of the rst documentation to markdown. Attempted to preserve whitespace and line breaks to minimize cosmetic change. --- CONTRIBUTING.rst | 2 +- INSTALL.md | 4 +- README.rst | 6 +- UPGRADE.rst | 2 +- changelog.d/5849.doc | 1 + docs/CAPTCHA_SETUP.md | 31 +++ docs/CAPTCHA_SETUP.rst | 30 --- docs/MSC1711_certificates_FAQ.md | 4 +- docs/README.md | 7 + docs/README.rst | 6 - docs/ancient_architecture_notes.md | 81 ++++++ docs/ancient_architecture_notes.rst | 59 ----- docs/application_services.md | 31 +++ docs/application_services.rst | 35 --- docs/architecture.md | 65 +++++ docs/architecture.rst | 68 ----- docs/code_style.md | 169 ++++++++++++ docs/code_style.rst | 180 ------------- docs/federate.md | 4 +- docs/log_contexts.md | 494 +++++++++++++++++++++++++++++++++++ docs/log_contexts.rst | 498 ------------------------------------ docs/media_repository.md | 30 +++ docs/media_repository.rst | 27 -- docs/metrics-howto.md | 217 ++++++++++++++++ docs/metrics-howto.rst | 285 --------------------- docs/opentracing.md | 93 +++++++ docs/opentracing.rst | 123 --------- docs/password_auth_providers.md | 116 +++++++++ docs/password_auth_providers.rst | 113 -------- docs/postgres.md | 164 ++++++++++++ docs/postgres.rst | 166 ------------ docs/replication.md | 37 +++ docs/replication.rst | 40 --- docs/reverse_proxy.md | 123 +++++++++ docs/reverse_proxy.rst | 112 -------- docs/sample_config.yaml | 12 +- docs/tcp_replication.md | 249 ++++++++++++++++++ docs/tcp_replication.rst | 249 ------------------ docs/turn-howto.md | 123 +++++++++ docs/turn-howto.rst | 127 --------- docs/workers.md | 284 ++++++++++++++++++++ docs/workers.rst | 301 ---------------------- synapse/config/server.py | 12 +- 43 files changed, 2338 insertions(+), 2442 deletions(-) create mode 100644 changelog.d/5849.doc create mode 100644 docs/CAPTCHA_SETUP.md delete mode 100644 docs/CAPTCHA_SETUP.rst create mode 100644 docs/README.md delete mode 100644 docs/README.rst create mode 100644 docs/ancient_architecture_notes.md delete mode 100644 docs/ancient_architecture_notes.rst create mode 100644 docs/application_services.md delete mode 100644 docs/application_services.rst create mode 100644 docs/architecture.md delete mode 100644 docs/architecture.rst create mode 100644 docs/code_style.md delete mode 100644 docs/code_style.rst create mode 100644 docs/log_contexts.md delete mode 100644 docs/log_contexts.rst create mode 100644 docs/media_repository.md delete mode 100644 docs/media_repository.rst create mode 100644 docs/metrics-howto.md delete mode 100644 docs/metrics-howto.rst create mode 100644 docs/opentracing.md delete mode 100644 docs/opentracing.rst create mode 100644 docs/password_auth_providers.md delete mode 100644 docs/password_auth_providers.rst create mode 100644 docs/postgres.md delete mode 100644 docs/postgres.rst create mode 100644 docs/replication.md delete mode 100644 docs/replication.rst create mode 100644 docs/reverse_proxy.md delete mode 100644 docs/reverse_proxy.rst create mode 100644 docs/tcp_replication.md delete mode 100644 docs/tcp_replication.rst create mode 100644 docs/turn-howto.md delete mode 100644 docs/turn-howto.rst create mode 100644 docs/workers.md delete mode 100644 docs/workers.rst diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 94dc650485..620dc88ce2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -56,7 +56,7 @@ Code style All Matrix projects have a well-defined code-style - and sometimes we've even got as far as documenting it... For instance, synapse's code style doc lives -at https://github.com/matrix-org/synapse/tree/master/docs/code_style.rst. +at https://github.com/matrix-org/synapse/tree/master/docs/code_style.md. Please ensure your changes match the cosmetic style of the existing project, and **never** mix cosmetic and functional changes in the same commit, as it diff --git a/INSTALL.md b/INSTALL.md index 6bce370ea8..3eb979c362 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -373,7 +373,7 @@ is suitable for local testing, but for any practical use, you will either need to enable a reverse proxy, or configure Synapse to expose an HTTPS port. For information on using a reverse proxy, see -[docs/reverse_proxy.rst](docs/reverse_proxy.rst). +[docs/reverse_proxy.md](docs/reverse_proxy.md). To configure Synapse to expose an HTTPS port, you will need to edit `homeserver.yaml`, as follows: @@ -446,7 +446,7 @@ on your server even if `enable_registration` is `false`. ## Setting up a TURN server For reliable VoIP calls to be routed via this homeserver, you MUST configure -a TURN server. See [docs/turn-howto.rst](docs/turn-howto.rst) for details. +a TURN server. See [docs/turn-howto.md](docs/turn-howto.md) for details. ## URL previews diff --git a/README.rst b/README.rst index bbff8de5ab..2948fd0765 100644 --- a/README.rst +++ b/README.rst @@ -115,7 +115,7 @@ Registering a new user from a client By default, registration of new users via Matrix clients is disabled. To enable it, specify ``enable_registration: true`` in ``homeserver.yaml``. (It is then -recommended to also set up CAPTCHA - see ``_.) +recommended to also set up CAPTCHA - see ``_.) Once ``enable_registration`` is set to ``true``, it is possible to register a user via `riot.im `_ or other Matrix clients. @@ -186,7 +186,7 @@ Almost all installations should opt to use PostreSQL. Advantages include: synapse itself. For information on how to install and use PostgreSQL, please see -`docs/postgres.rst `_. +`docs/postgres.md `_. .. _reverse-proxy: @@ -201,7 +201,7 @@ It is recommended to put a reverse proxy such as doing so is that it means that you can expose the default https port (443) to Matrix clients without needing to run Synapse with root privileges. -For information on configuring one, see ``_. +For information on configuring one, see ``_. Identity Servers ================ diff --git a/UPGRADE.rst b/UPGRADE.rst index dddcd75fda..5aaf804902 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -103,7 +103,7 @@ Upgrading to v1.2.0 =================== Some counter metrics have been renamed, with the old names deprecated. See -`the metrics documentation `_ +`the metrics documentation `_ for details. Upgrading to v1.1.0 diff --git a/changelog.d/5849.doc b/changelog.d/5849.doc new file mode 100644 index 0000000000..fbe62e8633 --- /dev/null +++ b/changelog.d/5849.doc @@ -0,0 +1 @@ +Convert documentation to markdown (from rst) diff --git a/docs/CAPTCHA_SETUP.md b/docs/CAPTCHA_SETUP.md new file mode 100644 index 0000000000..5f9057530b --- /dev/null +++ b/docs/CAPTCHA_SETUP.md @@ -0,0 +1,31 @@ +# Overview +Captcha can be enabled for this home server. This file explains how to do that. +The captcha mechanism used is Google's ReCaptcha. This requires API keys from Google. + +## Getting keys + +Requires a public/private key pair from: + + + +Must be a reCAPTCHA v2 key using the "I'm not a robot" Checkbox option + +## Setting ReCaptcha Keys + +The keys are a config option on the home server config. If they are not +visible, you can generate them via `--generate-config`. Set the following value: + + recaptcha_public_key: YOUR_PUBLIC_KEY + recaptcha_private_key: YOUR_PRIVATE_KEY + +In addition, you MUST enable captchas via: + + enable_registration_captcha: true + +## Configuring IP used for auth + +The ReCaptcha API requires that the IP address of the user who solved the +captcha is sent. If the client is connecting through a proxy or load balancer, +it may be required to use the `X-Forwarded-For` (XFF) header instead of the origin +IP address. This can be configured using the `x_forwarded` directive in the +listeners section of the homeserver.yaml configuration file. diff --git a/docs/CAPTCHA_SETUP.rst b/docs/CAPTCHA_SETUP.rst deleted file mode 100644 index 0c22ee4ff6..0000000000 --- a/docs/CAPTCHA_SETUP.rst +++ /dev/null @@ -1,30 +0,0 @@ -Captcha can be enabled for this home server. This file explains how to do that. -The captcha mechanism used is Google's ReCaptcha. This requires API keys from Google. - -Getting keys ------------- -Requires a public/private key pair from: - -https://developers.google.com/recaptcha/ - -Must be a reCAPTCHA v2 key using the "I'm not a robot" Checkbox option - -Setting ReCaptcha Keys ----------------------- -The keys are a config option on the home server config. If they are not -visible, you can generate them via --generate-config. Set the following value:: - - recaptcha_public_key: YOUR_PUBLIC_KEY - recaptcha_private_key: YOUR_PRIVATE_KEY - -In addition, you MUST enable captchas via:: - - enable_registration_captcha: true - -Configuring IP used for auth ----------------------------- -The ReCaptcha API requires that the IP address of the user who solved the -captcha is sent. If the client is connecting through a proxy or load balancer, -it may be required to use the X-Forwarded-For (XFF) header instead of the origin -IP address. This can be configured using the x_forwarded directive in the -listeners section of the homeserver.yaml configuration file. diff --git a/docs/MSC1711_certificates_FAQ.md b/docs/MSC1711_certificates_FAQ.md index 83497380df..80bd1294c7 100644 --- a/docs/MSC1711_certificates_FAQ.md +++ b/docs/MSC1711_certificates_FAQ.md @@ -147,7 +147,7 @@ your domain, you can simply route all traffic through the reverse proxy by updating the SRV record appropriately (or removing it, if the proxy listens on 8448). -See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a +See [reverse_proxy.md](reverse_proxy.md) for information on setting up a reverse proxy. #### Option 3: add a .well-known file to delegate your matrix traffic @@ -319,7 +319,7 @@ We no longer actively recommend against using a reverse proxy. Many admins will find it easier to direct federation traffic to a reverse proxy and manage their own TLS certificates, and this is a supported configuration. -See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a +See [reverse_proxy.md](reverse_proxy.md) for information on setting up a reverse proxy. ### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy? diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000000..3c6ea48c66 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,7 @@ +# Synapse Documentation + +This directory contains documentation specific to the `synapse` homeserver. + +All matrix-generic documentation now lives in its own project, located at [matrix-org/matrix-doc](https://github.com/matrix-org/matrix-doc) + +(Note: some items here may be moved to [matrix-org/matrix-doc](https://github.com/matrix-org/matrix-doc) at some point in the future.) diff --git a/docs/README.rst b/docs/README.rst deleted file mode 100644 index 3012da8b19..0000000000 --- a/docs/README.rst +++ /dev/null @@ -1,6 +0,0 @@ -All matrix-generic documentation now lives in its own project at - -github.com/matrix-org/matrix-doc.git - -Only Synapse implementation-specific documentation lives here now -(together with some older stuff will be shortly migrated over to matrix-doc) diff --git a/docs/ancient_architecture_notes.md b/docs/ancient_architecture_notes.md new file mode 100644 index 0000000000..3ea8976cc7 --- /dev/null +++ b/docs/ancient_architecture_notes.md @@ -0,0 +1,81 @@ +> **Warning** +> These architecture notes are spectacularly old, and date back +> to when Synapse was just federation code in isolation. This should be +> merged into the main spec. + +# Server to Server + +## Server to Server Stack + +To use the server to server stack, home servers should only need to +interact with the Messaging layer. + +The server to server side of things is designed into 4 distinct layers: + +1. Messaging Layer +2. Pdu Layer +3. Transaction Layer +4. Transport Layer + +Where the bottom (the transport layer) is what talks to the internet via +HTTP, and the top (the messaging layer) talks to the rest of the Home +Server with a domain specific API. + +1. **Messaging Layer** + + This is what the rest of the Home Server hits to send messages, join rooms, + etc. It also allows you to register callbacks for when it get's notified by + lower levels that e.g. a new message has been received. + + It is responsible for serializing requests to send to the data + layer, and to parse requests received from the data layer. + +2. **PDU Layer** + + This layer handles: + + - duplicate `pdu_id`'s - i.e., it makes sure we ignore them. + - responding to requests for a given `pdu_id` + - responding to requests for all metadata for a given context (i.e. room) + - handling incoming backfill requests + + So it has to parse incoming messages to discover which are metadata and + which aren't, and has to correctly clobber existing metadata where + appropriate. + + For incoming PDUs, it has to check the PDUs it references to see + if we have missed any. If we have go and ask someone (another + home server) for it. + +3. **Transaction Layer** + + This layer makes incoming requests idempotent. i.e., it stores + which transaction id's we have seen and what our response were. + If we have already seen a message with the given transaction id, + we do not notify higher levels but simply respond with the + previous response. + + `transaction_id` is from "`GET /send//`" + + It's also responsible for batching PDUs into single transaction for + sending to remote destinations, so that we only ever have one + transaction in flight to a given destination at any one time. + + This is also responsible for answering requests for things after a + given set of transactions, i.e., ask for everything after 'ver' X. + +4. **Transport Layer** + + This is responsible for starting a HTTP server and hitting the + correct callbacks on the Transaction layer, as well as sending + both data and requests for data. + +## Persistence + +We persist things in a single sqlite3 database. All database queries get +run on a separate, dedicated thread. This that we only ever have one +query running at a time, making it a lot easier to do things in a safe +manner. + +The queries are located in the `synapse.persistence.transactions` module, +and the table information in the `synapse.persistence.tables` module. diff --git a/docs/ancient_architecture_notes.rst b/docs/ancient_architecture_notes.rst deleted file mode 100644 index 2a5a2613c4..0000000000 --- a/docs/ancient_architecture_notes.rst +++ /dev/null @@ -1,59 +0,0 @@ -.. WARNING:: - These architecture notes are spectacularly old, and date back to when Synapse - was just federation code in isolation. This should be merged into the main - spec. - - -= Server to Server = - -== Server to Server Stack == - -To use the server to server stack, home servers should only need to interact with the Messaging layer. - -The server to server side of things is designed into 4 distinct layers: - - 1. Messaging Layer - 2. Pdu Layer - 3. Transaction Layer - 4. Transport Layer - -Where the bottom (the transport layer) is what talks to the internet via HTTP, and the top (the messaging layer) talks to the rest of the Home Server with a domain specific API. - -1. Messaging Layer - This is what the rest of the Home Server hits to send messages, join rooms, etc. It also allows you to register callbacks for when it get's notified by lower levels that e.g. a new message has been received. - - It is responsible for serializing requests to send to the data layer, and to parse requests received from the data layer. - - -2. PDU Layer - This layer handles: - * duplicate pdu_id's - i.e., it makes sure we ignore them. - * responding to requests for a given pdu_id - * responding to requests for all metadata for a given context (i.e. room) - * handling incoming backfill requests - - So it has to parse incoming messages to discover which are metadata and which aren't, and has to correctly clobber existing metadata where appropriate. - - For incoming PDUs, it has to check the PDUs it references to see if we have missed any. If we have go and ask someone (another home server) for it. - - -3. Transaction Layer - This layer makes incoming requests idempotent. I.e., it stores which transaction id's we have seen and what our response were. If we have already seen a message with the given transaction id, we do not notify higher levels but simply respond with the previous response. - -transaction_id is from "GET /send//" - - It's also responsible for batching PDUs into single transaction for sending to remote destinations, so that we only ever have one transaction in flight to a given destination at any one time. - - This is also responsible for answering requests for things after a given set of transactions, i.e., ask for everything after 'ver' X. - - -4. Transport Layer - This is responsible for starting a HTTP server and hitting the correct callbacks on the Transaction layer, as well as sending both data and requests for data. - - -== Persistence == - -We persist things in a single sqlite3 database. All database queries get run on a separate, dedicated thread. This that we only ever have one query running at a time, making it a lot easier to do things in a safe manner. - -The queries are located in the synapse.persistence.transactions module, and the table information in the synapse.persistence.tables module. - diff --git a/docs/application_services.md b/docs/application_services.md new file mode 100644 index 0000000000..06cb79f1f9 --- /dev/null +++ b/docs/application_services.md @@ -0,0 +1,31 @@ +# Registering an Application Service + +The registration of new application services depends on the homeserver used. +In synapse, you need to create a new configuration file for your AS and add it +to the list specified under the `app_service_config_files` config +option in your synapse config. + +For example: + +```yaml +app_service_config_files: +- /home/matrix/.synapse/.yaml +``` + +The format of the AS configuration file is as follows: + +```yaml +url: +as_token: +hs_token: +sender_localpart: +namespaces: + users: # List of users we're interested in + - exclusive: + regex: + - ... + aliases: [] # List of aliases we're interested in + rooms: [] # List of room ids we're interested in +``` + +See the [spec](https://matrix.org/docs/spec/application_service/unstable.html) for further details on how application services work. diff --git a/docs/application_services.rst b/docs/application_services.rst deleted file mode 100644 index fbc0c7e960..0000000000 --- a/docs/application_services.rst +++ /dev/null @@ -1,35 +0,0 @@ -Registering an Application Service -================================== - -The registration of new application services depends on the homeserver used. -In synapse, you need to create a new configuration file for your AS and add it -to the list specified under the ``app_service_config_files`` config -option in your synapse config. - -For example: - -.. code-block:: yaml - - app_service_config_files: - - /home/matrix/.synapse/.yaml - - -The format of the AS configuration file is as follows: - -.. code-block:: yaml - - url: - as_token: - hs_token: - sender_localpart: - namespaces: - users: # List of users we're interested in - - exclusive: - regex: - - ... - aliases: [] # List of aliases we're interested in - rooms: [] # List of room ids we're interested in - -See the spec_ for further details on how application services work. - -.. _spec: https://matrix.org/docs/spec/application_service/unstable.html diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000000..0c7f315f3f --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,65 @@ +# Synapse Architecture + +As of the end of Oct 2014, Synapse's overall architecture looks like: + + synapse + .-----------------------------------------------------. + | Notifier | + | ^ | | + | | | | + | .------------|------. | + | | handlers/ | | | + | | v | | + | | Event*Handler <--------> rest/* <=> Client + | | Rooms*Handler | | + HS <=> federation/* <==> FederationHandler | | + | | | PresenceHandler | | + | | | TypingHandler | | + | | '-------------------' | + | | | | | + | | state/* | | + | | | | | + | | v v | + | `--------------> storage/* | + | | | + '--------------------------|--------------------------' + v + .----. + | DB | + '----' + +- Handlers: business logic of synapse itself. Follows a set contract of BaseHandler: + - BaseHandler gives us onNewRoomEvent which: (TODO: flesh this out and make it less cryptic): + - handle_state(event) + - auth(event) + - persist_event(event) + - notify notifier or federation(event) + - PresenceHandler: use distributor to get EDUs out of Federation. + Very lightweight logic built on the distributor + - TypingHandler: use distributor to get EDUs out of Federation. + Very lightweight logic built on the distributor + - EventsHandler: handles the events stream... + - FederationHandler: - gets PDU from Federation Layer; turns into + an event; follows basehandler functionality. + - RoomsHandler: does all the room logic, including members - lots + of classes in RoomsHandler. + - ProfileHandler: talks to the storage to store/retrieve profile + info. +- EventFactory: generates events of particular event types. +- Notifier: Backs the events handler +- REST: Interfaces handlers and events to the outside world via + HTTP/JSON. Converts events back and forth from JSON. +- Federation: holds the HTTP client & server to talk to other servers. + Does replication to make sure there's nothing missing in the graph. + Handles reliability. Handles txns. +- Distributor: generic event bus. used for presence & typing only + currently. Notifier could be implemented using Distributor - so far + we are only using for things which actually /require/ dynamic + pluggability however as it can obfuscate the actual flow of control. +- Auth: helper singleton to say whether a given event is allowed to do + a given thing (TODO: put this on the diagram) +- State: helper singleton: does state conflict resolution. You give it + an event and it tells you if it actually updates the state or not, + and annotates the event up properly and handles merge conflict + resolution. +- Storage: abstracts the storage engine. diff --git a/docs/architecture.rst b/docs/architecture.rst deleted file mode 100644 index 98050428b9..0000000000 --- a/docs/architecture.rst +++ /dev/null @@ -1,68 +0,0 @@ -Synapse Architecture -==================== - -As of the end of Oct 2014, Synapse's overall architecture looks like:: - - synapse - .-----------------------------------------------------. - | Notifier | - | ^ | | - | | | | - | .------------|------. | - | | handlers/ | | | - | | v | | - | | Event*Handler <--------> rest/* <=> Client - | | Rooms*Handler | | - HSes <=> federation/* <==> FederationHandler | | - | | | PresenceHandler | | - | | | TypingHandler | | - | | '-------------------' | - | | | | | - | | state/* | | - | | | | | - | | v v | - | `--------------> storage/* | - | | | - '--------------------------|--------------------------' - v - .----. - | DB | - '----' - -* Handlers: business logic of synapse itself. Follows a set contract of BaseHandler: - - - BaseHandler gives us onNewRoomEvent which: (TODO: flesh this out and make it less cryptic): - - + handle_state(event) - + auth(event) - + persist_event(event) - + notify notifier or federation(event) - - - PresenceHandler: use distributor to get EDUs out of Federation. Very - lightweight logic built on the distributor - - TypingHandler: use distributor to get EDUs out of Federation. Very - lightweight logic built on the distributor - - EventsHandler: handles the events stream... - - FederationHandler: - gets PDU from Federation Layer; turns into an event; - follows basehandler functionality. - - RoomsHandler: does all the room logic, including members - lots of classes in - RoomsHandler. - - ProfileHandler: talks to the storage to store/retrieve profile info. - -* EventFactory: generates events of particular event types. -* Notifier: Backs the events handler -* REST: Interfaces handlers and events to the outside world via HTTP/JSON. - Converts events back and forth from JSON. -* Federation: holds the HTTP client & server to talk to other servers. Does - replication to make sure there's nothing missing in the graph. Handles - reliability. Handles txns. -* Distributor: generic event bus. used for presence & typing only currently. - Notifier could be implemented using Distributor - so far we are only using for - things which actually /require/ dynamic pluggability however as it can - obfuscate the actual flow of control. -* Auth: helper singleton to say whether a given event is allowed to do a given - thing (TODO: put this on the diagram) -* State: helper singleton: does state conflict resolution. You give it an event - and it tells you if it actually updates the state or not, and annotates the - event up properly and handles merge conflict resolution. -* Storage: abstracts the storage engine. diff --git a/docs/code_style.md b/docs/code_style.md new file mode 100644 index 0000000000..f983f72d6c --- /dev/null +++ b/docs/code_style.md @@ -0,0 +1,169 @@ +# Code Style + +## Formatting tools + +The Synapse codebase uses a number of code formatting tools in order to +quickly and automatically check for formatting (and sometimes logical) +errors in code. + +The necessary tools are detailed below. + +- **black** + + The Synapse codebase uses [black](https://pypi.org/project/black/) + as an opinionated code formatter, ensuring all comitted code is + properly formatted. + + First install `black` with: + + pip install --upgrade black + + Have `black` auto-format your code (it shouldn't change any + functionality) with: + + black . --exclude="\.tox|build|env" + +- **flake8** + + `flake8` is a code checking tool. We require code to pass `flake8` + before being merged into the codebase. + + Install `flake8` with: + + pip install --upgrade flake8 + + Check all application and test code with: + + flake8 synapse tests + +- **isort** + + `isort` ensures imports are nicely formatted, and can suggest and + auto-fix issues such as double-importing. + + Install `isort` with: + + pip install --upgrade isort + + Auto-fix imports with: + + isort -rc synapse tests + + `-rc` means to recursively search the given directories. + +It's worth noting that modern IDEs and text editors can run these tools +automatically on save. It may be worth looking into whether this +functionality is supported in your editor for a more convenient +development workflow. It is not, however, recommended to run `flake8` on +save as it takes a while and is very resource intensive. + +## General rules + +- **Naming**: + - Use camel case for class and type names + - Use underscores for functions and variables. +- **Docstrings**: should follow the [google code + style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings). + This is so that we can generate documentation with + [sphinx](http://sphinxcontrib-napoleon.readthedocs.org/en/latest/). + See the + [examples](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) + in the sphinx documentation. +- **Imports**: + - Imports should be sorted by `isort` as described above. + - Prefer to import classes and functions rather than packages or + modules. + + Example: + + from synapse.types import UserID + ... + user_id = UserID(local, server) + + is preferred over: + + from synapse import types + ... + user_id = types.UserID(local, server) + + (or any other variant). + + This goes against the advice in the Google style guide, but it + means that errors in the name are caught early (at import time). + + - Avoid wildcard imports (`from synapse.types import *`) and + relative imports (`from .types import UserID`). + +## Configuration file format + +The [sample configuration file](./sample_config.yaml) acts as a +reference to Synapse's configuration options for server administrators. +Remember that many readers will be unfamiliar with YAML and server +administration in general, so that it is important that the file be as +easy to understand as possible, which includes following a consistent +format. + +Some guidelines follow: + +- Sections should be separated with a heading consisting of a single + line prefixed and suffixed with `##`. There should be **two** blank + lines before the section header, and **one** after. +- Each option should be listed in the file with the following format: + - A comment describing the setting. Each line of this comment + should be prefixed with a hash (`#`) and a space. + + The comment should describe the default behaviour (ie, what + happens if the setting is omitted), as well as what the effect + will be if the setting is changed. + + Often, the comment end with something like "uncomment the + following to ". + + - A line consisting of only `#`. + - A commented-out example setting, prefixed with only `#`. + + For boolean (on/off) options, convention is that this example + should be the *opposite* to the default (so the comment will end + with "Uncomment the following to enable [or disable] + ." For other options, the example should give some + non-default value which is likely to be useful to the reader. + +- There should be a blank line between each option. +- Where several settings are grouped into a single dict, *avoid* the + convention where the whole block is commented out, resulting in + comment lines starting `# #`, as this is hard to read and confusing + to edit. Instead, leave the top-level config option uncommented, and + follow the conventions above for sub-options. Ensure that your code + correctly handles the top-level option being set to `None` (as it + will be if no sub-options are enabled). +- Lines should be wrapped at 80 characters. + +Example: + + ## Frobnication ## + + # The frobnicator will ensure that all requests are fully frobnicated. + # To enable it, uncomment the following. + # + #frobnicator_enabled: true + + # By default, the frobnicator will frobnicate with the default frobber. + # The following will make it use an alternative frobber. + # + #frobincator_frobber: special_frobber + + # Settings for the frobber + # + frobber: + # frobbing speed. Defaults to 1. + # + #speed: 10 + + # frobbing distance. Defaults to 1000. + # + #distance: 100 + +Note that the sample configuration is generated from the synapse code +and is maintained by a script, `scripts-dev/generate_sample_config`. +Making sure that the output from this script matches the desired format +is left as an exercise for the reader! diff --git a/docs/code_style.rst b/docs/code_style.rst deleted file mode 100644 index 39ac4ebedc..0000000000 --- a/docs/code_style.rst +++ /dev/null @@ -1,180 +0,0 @@ -Code Style -========== - -Formatting tools ----------------- - -The Synapse codebase uses a number of code formatting tools in order to -quickly and automatically check for formatting (and sometimes logical) errors -in code. - -The necessary tools are detailed below. - -- **black** - - The Synapse codebase uses `black `_ as an - opinionated code formatter, ensuring all comitted code is properly - formatted. - - First install ``black`` with:: - - pip install --upgrade black - - Have ``black`` auto-format your code (it shouldn't change any functionality) - with:: - - black . --exclude="\.tox|build|env" - -- **flake8** - - ``flake8`` is a code checking tool. We require code to pass ``flake8`` before being merged into the codebase. - - Install ``flake8`` with:: - - pip install --upgrade flake8 - - Check all application and test code with:: - - flake8 synapse tests - -- **isort** - - ``isort`` ensures imports are nicely formatted, and can suggest and - auto-fix issues such as double-importing. - - Install ``isort`` with:: - - pip install --upgrade isort - - Auto-fix imports with:: - - isort -rc synapse tests - - ``-rc`` means to recursively search the given directories. - -It's worth noting that modern IDEs and text editors can run these tools -automatically on save. It may be worth looking into whether this -functionality is supported in your editor for a more convenient development -workflow. It is not, however, recommended to run ``flake8`` on save as it -takes a while and is very resource intensive. - -General rules -------------- - -- **Naming**: - - - Use camel case for class and type names - - Use underscores for functions and variables. - -- **Docstrings**: should follow the `google code style - `_. - This is so that we can generate documentation with `sphinx - `_. See the - `examples - `_ - in the sphinx documentation. - -- **Imports**: - - - Imports should be sorted by ``isort`` as described above. - - - Prefer to import classes and functions rather than packages or modules. - - Example:: - - from synapse.types import UserID - ... - user_id = UserID(local, server) - - is preferred over:: - - from synapse import types - ... - user_id = types.UserID(local, server) - - (or any other variant). - - This goes against the advice in the Google style guide, but it means that - errors in the name are caught early (at import time). - - - Avoid wildcard imports (``from synapse.types import *``) and relative - imports (``from .types import UserID``). - -Configuration file format -------------------------- - -The `sample configuration file <./sample_config.yaml>`_ acts as a reference to -Synapse's configuration options for server administrators. Remember that many -readers will be unfamiliar with YAML and server administration in general, so -that it is important that the file be as easy to understand as possible, which -includes following a consistent format. - -Some guidelines follow: - -* Sections should be separated with a heading consisting of a single line - prefixed and suffixed with ``##``. There should be **two** blank lines - before the section header, and **one** after. - -* Each option should be listed in the file with the following format: - - * A comment describing the setting. Each line of this comment should be - prefixed with a hash (``#``) and a space. - - The comment should describe the default behaviour (ie, what happens if - the setting is omitted), as well as what the effect will be if the - setting is changed. - - Often, the comment end with something like "uncomment the - following to \". - - * A line consisting of only ``#``. - - * A commented-out example setting, prefixed with only ``#``. - - For boolean (on/off) options, convention is that this example should be - the *opposite* to the default (so the comment will end with "Uncomment - the following to enable [or disable] \." For other options, - the example should give some non-default value which is likely to be - useful to the reader. - -* There should be a blank line between each option. - -* Where several settings are grouped into a single dict, *avoid* the - convention where the whole block is commented out, resulting in comment - lines starting ``# #``, as this is hard to read and confusing to - edit. Instead, leave the top-level config option uncommented, and follow - the conventions above for sub-options. Ensure that your code correctly - handles the top-level option being set to ``None`` (as it will be if no - sub-options are enabled). - -* Lines should be wrapped at 80 characters. - -Example:: - - ## Frobnication ## - - # The frobnicator will ensure that all requests are fully frobnicated. - # To enable it, uncomment the following. - # - #frobnicator_enabled: true - - # By default, the frobnicator will frobnicate with the default frobber. - # The following will make it use an alternative frobber. - # - #frobincator_frobber: special_frobber - - # Settings for the frobber - # - frobber: - # frobbing speed. Defaults to 1. - # - #speed: 10 - - # frobbing distance. Defaults to 1000. - # - #distance: 100 - -Note that the sample configuration is generated from the synapse code and is -maintained by a script, ``scripts-dev/generate_sample_config``. Making sure -that the output from this script matches the desired format is left as an -exercise for the reader! diff --git a/docs/federate.md b/docs/federate.md index 6d6bb85e15..193e2d2dfe 100644 --- a/docs/federate.md +++ b/docs/federate.md @@ -148,7 +148,7 @@ We no longer actively recommend against using a reverse proxy. Many admins will find it easier to direct federation traffic to a reverse proxy and manage their own TLS certificates, and this is a supported configuration. -See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a +See [reverse_proxy.md](reverse_proxy.md) for information on setting up a reverse proxy. #### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy? @@ -184,7 +184,7 @@ a complicated dance which requires connections in both directions). Another common problem is that people on other servers can't join rooms that you invite them to. This can be caused by an incorrectly-configured reverse -proxy: see [reverse_proxy.rst]() for instructions on how to correctly +proxy: see [reverse_proxy.md]() for instructions on how to correctly configure a reverse proxy. ## Running a Demo Federation of Synapses diff --git a/docs/log_contexts.md b/docs/log_contexts.md new file mode 100644 index 0000000000..5331e8c88b --- /dev/null +++ b/docs/log_contexts.md @@ -0,0 +1,494 @@ +# Log Contexts + +To help track the processing of individual requests, synapse uses a +'`log context`' to track which request it is handling at any given +moment. This is done via a thread-local variable; a `logging.Filter` is +then used to fish the information back out of the thread-local variable +and add it to each log record. + +Logcontexts are also used for CPU and database accounting, so that we +can track which requests were responsible for high CPU use or database +activity. + +The `synapse.logging.context` module provides a facilities for managing +the current log context (as well as providing the `LoggingContextFilter` +class). + +Deferreds make the whole thing complicated, so this document describes +how it all works, and how to write code which follows the rules. + +##Logcontexts without Deferreds + +In the absence of any Deferred voodoo, things are simple enough. As with +any code of this nature, the rule is that our function should leave +things as it found them: + +```python +from synapse.logging import context # omitted from future snippets + +def handle_request(request_id): + request_context = context.LoggingContext() + + calling_context = context.LoggingContext.current_context() + context.LoggingContext.set_current_context(request_context) + try: + request_context.request = request_id + do_request_handling() + logger.debug("finished") + finally: + context.LoggingContext.set_current_context(calling_context) + +def do_request_handling(): + logger.debug("phew") # this will be logged against request_id +``` + +LoggingContext implements the context management methods, so the above +can be written much more succinctly as: + +```python +def handle_request(request_id): + with context.LoggingContext() as request_context: + request_context.request = request_id + do_request_handling() + logger.debug("finished") + +def do_request_handling(): + logger.debug("phew") +``` + +## Using logcontexts with Deferreds + +Deferreds --- and in particular, `defer.inlineCallbacks` --- break the +linear flow of code so that there is no longer a single entry point +where we should set the logcontext and a single exit point where we +should remove it. + +Consider the example above, where `do_request_handling` needs to do some +blocking operation, and returns a deferred: + +```python +@defer.inlineCallbacks +def handle_request(request_id): + with context.LoggingContext() as request_context: + request_context.request = request_id + yield do_request_handling() + logger.debug("finished") +``` + +In the above flow: + +- The logcontext is set +- `do_request_handling` is called, and returns a deferred +- `handle_request` yields the deferred +- The `inlineCallbacks` wrapper of `handle_request` returns a deferred + +So we have stopped processing the request (and will probably go on to +start processing the next), without clearing the logcontext. + +To circumvent this problem, synapse code assumes that, wherever you have +a deferred, you will want to yield on it. To that end, whereever +functions return a deferred, we adopt the following conventions: + +**Rules for functions returning deferreds:** + +> - If the deferred is already complete, the function returns with the +> same logcontext it started with. +> - If the deferred is incomplete, the function clears the logcontext +> before returning; when the deferred completes, it restores the +> logcontext before running any callbacks. + +That sounds complicated, but actually it means a lot of code (including +the example above) "just works". There are two cases: + +- If `do_request_handling` returns a completed deferred, then the + logcontext will still be in place. In this case, execution will + continue immediately after the `yield`; the "finished" line will + be logged against the right context, and the `with` block restores + the original context before we return to the caller. +- If the returned deferred is incomplete, `do_request_handling` clears + the logcontext before returning. The logcontext is therefore clear + when `handle_request` yields the deferred. At that point, the + `inlineCallbacks` wrapper adds a callback to the deferred, and + returns another (incomplete) deferred to the caller, and it is safe + to begin processing the next request. + + Once `do_request_handling`'s deferred completes, it will reinstate + the logcontext, before running the callback added by the + `inlineCallbacks` wrapper. That callback runs the second half of + `handle_request`, so again the "finished" line will be logged + against the right context, and the `with` block restores the + original context. + +As an aside, it's worth noting that `handle_request` follows our rules +-though that only matters if the caller has its own logcontext which it +cares about. + +The following sections describe pitfalls and helpful patterns when +implementing these rules. + +Always yield your deferreds +--------------------------- + +Whenever you get a deferred back from a function, you should `yield` on +it as soon as possible. (Returning it directly to your caller is ok too, +if you're not doing `inlineCallbacks`.) Do not pass go; do not do any +logging; do not call any other functions. + +```python +@defer.inlineCallbacks +def fun(): + logger.debug("starting") + yield do_some_stuff() # just like this + + d = more_stuff() + result = yield d # also fine, of course + + return result + +def nonInlineCallbacksFun(): + logger.debug("just a wrapper really") + return do_some_stuff() # this is ok too - the caller will yield on + # it anyway. +``` + +Provided this pattern is followed all the way back up to the callchain +to where the logcontext was set, this will make things work out ok: +provided `do_some_stuff` and `more_stuff` follow the rules above, then +so will `fun` (as wrapped by `inlineCallbacks`) and +`nonInlineCallbacksFun`. + +It's all too easy to forget to `yield`: for instance if we forgot that +`do_some_stuff` returned a deferred, we might plough on regardless. This +leads to a mess; it will probably work itself out eventually, but not +before a load of stuff has been logged against the wrong context. +(Normally, other things will break, more obviously, if you forget to +`yield`, so this tends not to be a major problem in practice.) + +Of course sometimes you need to do something a bit fancier with your +Deferreds - not all code follows the linear A-then-B-then-C pattern. +Notes on implementing more complex patterns are in later sections. + +## Where you create a new Deferred, make it follow the rules + +Most of the time, a Deferred comes from another synapse function. +Sometimes, though, we need to make up a new Deferred, or we get a +Deferred back from external code. We need to make it follow our rules. + +The easy way to do it is with a combination of `defer.inlineCallbacks`, +and `context.PreserveLoggingContext`. Suppose we want to implement +`sleep`, which returns a deferred which will run its callbacks after a +given number of seconds. That might look like: + +```python +# not a logcontext-rules-compliant function +def get_sleep_deferred(seconds): + d = defer.Deferred() + reactor.callLater(seconds, d.callback, None) + return d +``` + +That doesn't follow the rules, but we can fix it by wrapping it with +`PreserveLoggingContext` and `yield` ing on it: + +```python +@defer.inlineCallbacks +def sleep(seconds): + with PreserveLoggingContext(): + yield get_sleep_deferred(seconds) +``` + +This technique works equally for external functions which return +deferreds, or deferreds we have made ourselves. + +You can also use `context.make_deferred_yieldable`, which just does the +boilerplate for you, so the above could be written: + +```python +def sleep(seconds): + return context.make_deferred_yieldable(get_sleep_deferred(seconds)) +``` + +## Fire-and-forget + +Sometimes you want to fire off a chain of execution, but not wait for +its result. That might look a bit like this: + +```python +@defer.inlineCallbacks +def do_request_handling(): + yield foreground_operation() + + # *don't* do this + background_operation() + + logger.debug("Request handling complete") + +@defer.inlineCallbacks +def background_operation(): + yield first_background_step() + logger.debug("Completed first step") + yield second_background_step() + logger.debug("Completed second step") +``` + +The above code does a couple of steps in the background after +`do_request_handling` has finished. The log lines are still logged +against the `request_context` logcontext, which may or may not be +desirable. There are two big problems with the above, however. The first +problem is that, if `background_operation` returns an incomplete +Deferred, it will expect its caller to `yield` immediately, so will have +cleared the logcontext. In this example, that means that 'Request +handling complete' will be logged without any context. + +The second problem, which is potentially even worse, is that when the +Deferred returned by `background_operation` completes, it will restore +the original logcontext. There is nothing waiting on that Deferred, so +the logcontext will leak into the reactor and possibly get attached to +some arbitrary future operation. + +There are two potential solutions to this. + +One option is to surround the call to `background_operation` with a +`PreserveLoggingContext` call. That will reset the logcontext before +starting `background_operation` (so the context restored when the +deferred completes will be the empty logcontext), and will restore the +current logcontext before continuing the foreground process: + +```python +@defer.inlineCallbacks +def do_request_handling(): + yield foreground_operation() + + # start background_operation off in the empty logcontext, to + # avoid leaking the current context into the reactor. + with PreserveLoggingContext(): + background_operation() + + # this will now be logged against the request context + logger.debug("Request handling complete") +``` + +Obviously that option means that the operations done in +`background_operation` would be not be logged against a logcontext +(though that might be fixed by setting a different logcontext via a +`with LoggingContext(...)` in `background_operation`). + +The second option is to use `context.run_in_background`, which wraps a +function so that it doesn't reset the logcontext even when it returns +an incomplete deferred, and adds a callback to the returned deferred to +reset the logcontext. In other words, it turns a function that follows +the Synapse rules about logcontexts and Deferreds into one which behaves +more like an external function --- the opposite operation to that +described in the previous section. It can be used like this: + +```python +@defer.inlineCallbacks +def do_request_handling(): + yield foreground_operation() + + context.run_in_background(background_operation) + + # this will now be logged against the request context + logger.debug("Request handling complete") +``` + +## Passing synapse deferreds into third-party functions + +A typical example of this is where we want to collect together two or +more deferred via `defer.gatherResults`: + +```python +d1 = operation1() +d2 = operation2() +d3 = defer.gatherResults([d1, d2]) +``` + +This is really a variation of the fire-and-forget problem above, in that +we are firing off `d1` and `d2` without yielding on them. The difference +is that we now have third-party code attached to their callbacks. Anyway +either technique given in the [Fire-and-forget](#fire-and-forget) +section will work. + +Of course, the new Deferred returned by `gatherResults` needs to be +wrapped in order to make it follow the logcontext rules before we can +yield it, as described in [Where you create a new Deferred, make it +follow the +rules](#where-you-create-a-new-deferred-make-it-follow-the-rules). + +So, option one: reset the logcontext before starting the operations to +be gathered: + +```python +@defer.inlineCallbacks +def do_request_handling(): + with PreserveLoggingContext(): + d1 = operation1() + d2 = operation2() + result = yield defer.gatherResults([d1, d2]) +``` + +In this case particularly, though, option two, of using +`context.preserve_fn` almost certainly makes more sense, so that +`operation1` and `operation2` are both logged against the original +logcontext. This looks like: + +```python +@defer.inlineCallbacks +def do_request_handling(): + d1 = context.preserve_fn(operation1)() + d2 = context.preserve_fn(operation2)() + + with PreserveLoggingContext(): + result = yield defer.gatherResults([d1, d2]) +``` + +## Was all this really necessary? + +The conventions used work fine for a linear flow where everything +happens in series via `defer.inlineCallbacks` and `yield`, but are +certainly tricky to follow for any more exotic flows. It's hard not to +wonder if we could have done something else. + +We're not going to rewrite Synapse now, so the following is entirely of +academic interest, but I'd like to record some thoughts on an +alternative approach. + +I briefly prototyped some code following an alternative set of rules. I +think it would work, but I certainly didn't get as far as thinking how +it would interact with concepts as complicated as the cache descriptors. + +My alternative rules were: + +- functions always preserve the logcontext of their caller, whether or + not they are returning a Deferred. +- Deferreds returned by synapse functions run their callbacks in the + same context as the function was orignally called in. + +The main point of this scheme is that everywhere that sets the +logcontext is responsible for clearing it before returning control to +the reactor. + +So, for example, if you were the function which started a +`with LoggingContext` block, you wouldn't `yield` within it --- instead +you'd start off the background process, and then leave the `with` block +to wait for it: + +```python +def handle_request(request_id): + with context.LoggingContext() as request_context: + request_context.request = request_id + d = do_request_handling() + + def cb(r): + logger.debug("finished") + + d.addCallback(cb) + return d +``` + +(in general, mixing `with LoggingContext` blocks and +`defer.inlineCallbacks` in the same function leads to slighly +counter-intuitive code, under this scheme). + +Because we leave the original `with` block as soon as the Deferred is +returned (as opposed to waiting for it to be resolved, as we do today), +the logcontext is cleared before control passes back to the reactor; so +if there is some code within `do_request_handling` which needs to wait +for a Deferred to complete, there is no need for it to worry about +clearing the logcontext before doing so: + +```python +def handle_request(): + r = do_some_stuff() + r.addCallback(do_some_more_stuff) + return r +``` + +--- and provided `do_some_stuff` follows the rules of returning a +Deferred which runs its callbacks in the original logcontext, all is +happy. + +The business of a Deferred which runs its callbacks in the original +logcontext isn't hard to achieve --- we have it today, in the shape of +`context._PreservingContextDeferred`: + +```python +def do_some_stuff(): + deferred = do_some_io() + pcd = _PreservingContextDeferred(LoggingContext.current_context()) + deferred.chainDeferred(pcd) + return pcd +``` + +It turns out that, thanks to the way that Deferreds chain together, we +automatically get the property of a context-preserving deferred with +`defer.inlineCallbacks`, provided the final Defered the function +`yields` on has that property. So we can just write: + +```python +@defer.inlineCallbacks +def handle_request(): + yield do_some_stuff() + yield do_some_more_stuff() +``` + +To conclude: I think this scheme would have worked equally well, with +less danger of messing it up, and probably made some more esoteric code +easier to write. But again --- changing the conventions of the entire +Synapse codebase is not a sensible option for the marginal improvement +offered. + +## A note on garbage-collection of Deferred chains + +It turns out that our logcontext rules do not play nicely with Deferred +chains which get orphaned and garbage-collected. + +Imagine we have some code that looks like this: + +```python +listener_queue = [] + +def on_something_interesting(): + for d in listener_queue: + d.callback("foo") + +@defer.inlineCallbacks +def await_something_interesting(): + new_deferred = defer.Deferred() + listener_queue.append(new_deferred) + + with PreserveLoggingContext(): + yield new_deferred +``` + +Obviously, the idea here is that we have a bunch of things which are +waiting for an event. (It's just an example of the problem here, but a +relatively common one.) + +Now let's imagine two further things happen. First of all, whatever was +waiting for the interesting thing goes away. (Perhaps the request times +out, or something *even more* interesting happens.) + +Secondly, let's suppose that we decide that the interesting thing is +never going to happen, and we reset the listener queue: + +```python +def reset_listener_queue(): + listener_queue.clear() +``` + +So, both ends of the deferred chain have now dropped their references, +and the deferred chain is now orphaned, and will be garbage-collected at +some point. Note that `await_something_interesting` is a generator +function, and when Python garbage-collects generator functions, it gives +them a chance to clean up by making the `yield` raise a `GeneratorExit` +exception. In our case, that means that the `__exit__` handler of +`PreserveLoggingContext` will carefully restore the request context, but +there is now nothing waiting for its return, so the request context is +never cleared. + +To reiterate, this problem only arises when *both* ends of a deferred +chain are dropped. Dropping the the reference to a deferred you're +supposed to be calling is probably bad practice, so this doesn't +actually happen too much. Unfortunately, when it does happen, it will +lead to leaked logcontexts which are incredibly hard to track down. diff --git a/docs/log_contexts.rst b/docs/log_contexts.rst deleted file mode 100644 index 4502cd9454..0000000000 --- a/docs/log_contexts.rst +++ /dev/null @@ -1,498 +0,0 @@ -Log Contexts -============ - -.. contents:: - -To help track the processing of individual requests, synapse uses a -'log context' to track which request it is handling at any given moment. This -is done via a thread-local variable; a ``logging.Filter`` is then used to fish -the information back out of the thread-local variable and add it to each log -record. - -Logcontexts are also used for CPU and database accounting, so that we can track -which requests were responsible for high CPU use or database activity. - -The ``synapse.logging.context`` module provides a facilities for managing the -current log context (as well as providing the ``LoggingContextFilter`` class). - -Deferreds make the whole thing complicated, so this document describes how it -all works, and how to write code which follows the rules. - -Logcontexts without Deferreds ------------------------------ - -In the absence of any Deferred voodoo, things are simple enough. As with any -code of this nature, the rule is that our function should leave things as it -found them: - -.. code:: python - - from synapse.logging import context # omitted from future snippets - - def handle_request(request_id): - request_context = context.LoggingContext() - - calling_context = context.LoggingContext.current_context() - context.LoggingContext.set_current_context(request_context) - try: - request_context.request = request_id - do_request_handling() - logger.debug("finished") - finally: - context.LoggingContext.set_current_context(calling_context) - - def do_request_handling(): - logger.debug("phew") # this will be logged against request_id - - -LoggingContext implements the context management methods, so the above can be -written much more succinctly as: - -.. code:: python - - def handle_request(request_id): - with context.LoggingContext() as request_context: - request_context.request = request_id - do_request_handling() - logger.debug("finished") - - def do_request_handling(): - logger.debug("phew") - - -Using logcontexts with Deferreds --------------------------------- - -Deferreds — and in particular, ``defer.inlineCallbacks`` — break -the linear flow of code so that there is no longer a single entry point where -we should set the logcontext and a single exit point where we should remove it. - -Consider the example above, where ``do_request_handling`` needs to do some -blocking operation, and returns a deferred: - -.. code:: python - - @defer.inlineCallbacks - def handle_request(request_id): - with context.LoggingContext() as request_context: - request_context.request = request_id - yield do_request_handling() - logger.debug("finished") - - -In the above flow: - -* The logcontext is set -* ``do_request_handling`` is called, and returns a deferred -* ``handle_request`` yields the deferred -* The ``inlineCallbacks`` wrapper of ``handle_request`` returns a deferred - -So we have stopped processing the request (and will probably go on to start -processing the next), without clearing the logcontext. - -To circumvent this problem, synapse code assumes that, wherever you have a -deferred, you will want to yield on it. To that end, whereever functions return -a deferred, we adopt the following conventions: - -**Rules for functions returning deferreds:** - - * If the deferred is already complete, the function returns with the same - logcontext it started with. - * If the deferred is incomplete, the function clears the logcontext before - returning; when the deferred completes, it restores the logcontext before - running any callbacks. - -That sounds complicated, but actually it means a lot of code (including the -example above) "just works". There are two cases: - -* If ``do_request_handling`` returns a completed deferred, then the logcontext - will still be in place. In this case, execution will continue immediately - after the ``yield``; the "finished" line will be logged against the right - context, and the ``with`` block restores the original context before we - return to the caller. - -* If the returned deferred is incomplete, ``do_request_handling`` clears the - logcontext before returning. The logcontext is therefore clear when - ``handle_request`` yields the deferred. At that point, the ``inlineCallbacks`` - wrapper adds a callback to the deferred, and returns another (incomplete) - deferred to the caller, and it is safe to begin processing the next request. - - Once ``do_request_handling``'s deferred completes, it will reinstate the - logcontext, before running the callback added by the ``inlineCallbacks`` - wrapper. That callback runs the second half of ``handle_request``, so again - the "finished" line will be logged against the right - context, and the ``with`` block restores the original context. - -As an aside, it's worth noting that ``handle_request`` follows our rules - -though that only matters if the caller has its own logcontext which it cares -about. - -The following sections describe pitfalls and helpful patterns when implementing -these rules. - -Always yield your deferreds ---------------------------- - -Whenever you get a deferred back from a function, you should ``yield`` on it -as soon as possible. (Returning it directly to your caller is ok too, if you're -not doing ``inlineCallbacks``.) Do not pass go; do not do any logging; do not -call any other functions. - -.. code:: python - - @defer.inlineCallbacks - def fun(): - logger.debug("starting") - yield do_some_stuff() # just like this - - d = more_stuff() - result = yield d # also fine, of course - - return result - - def nonInlineCallbacksFun(): - logger.debug("just a wrapper really") - return do_some_stuff() # this is ok too - the caller will yield on - # it anyway. - -Provided this pattern is followed all the way back up to the callchain to where -the logcontext was set, this will make things work out ok: provided -``do_some_stuff`` and ``more_stuff`` follow the rules above, then so will -``fun`` (as wrapped by ``inlineCallbacks``) and ``nonInlineCallbacksFun``. - -It's all too easy to forget to ``yield``: for instance if we forgot that -``do_some_stuff`` returned a deferred, we might plough on regardless. This -leads to a mess; it will probably work itself out eventually, but not before -a load of stuff has been logged against the wrong context. (Normally, other -things will break, more obviously, if you forget to ``yield``, so this tends -not to be a major problem in practice.) - -Of course sometimes you need to do something a bit fancier with your Deferreds -- not all code follows the linear A-then-B-then-C pattern. Notes on -implementing more complex patterns are in later sections. - -Where you create a new Deferred, make it follow the rules ---------------------------------------------------------- - -Most of the time, a Deferred comes from another synapse function. Sometimes, -though, we need to make up a new Deferred, or we get a Deferred back from -external code. We need to make it follow our rules. - -The easy way to do it is with a combination of ``defer.inlineCallbacks``, and -``context.PreserveLoggingContext``. Suppose we want to implement ``sleep``, -which returns a deferred which will run its callbacks after a given number of -seconds. That might look like: - -.. code:: python - - # not a logcontext-rules-compliant function - def get_sleep_deferred(seconds): - d = defer.Deferred() - reactor.callLater(seconds, d.callback, None) - return d - -That doesn't follow the rules, but we can fix it by wrapping it with -``PreserveLoggingContext`` and ``yield`` ing on it: - -.. code:: python - - @defer.inlineCallbacks - def sleep(seconds): - with PreserveLoggingContext(): - yield get_sleep_deferred(seconds) - -This technique works equally for external functions which return deferreds, -or deferreds we have made ourselves. - -You can also use ``context.make_deferred_yieldable``, which just does the -boilerplate for you, so the above could be written: - -.. code:: python - - def sleep(seconds): - return context.make_deferred_yieldable(get_sleep_deferred(seconds)) - - -Fire-and-forget ---------------- - -Sometimes you want to fire off a chain of execution, but not wait for its -result. That might look a bit like this: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - yield foreground_operation() - - # *don't* do this - background_operation() - - logger.debug("Request handling complete") - - @defer.inlineCallbacks - def background_operation(): - yield first_background_step() - logger.debug("Completed first step") - yield second_background_step() - logger.debug("Completed second step") - -The above code does a couple of steps in the background after -``do_request_handling`` has finished. The log lines are still logged against -the ``request_context`` logcontext, which may or may not be desirable. There -are two big problems with the above, however. The first problem is that, if -``background_operation`` returns an incomplete Deferred, it will expect its -caller to ``yield`` immediately, so will have cleared the logcontext. In this -example, that means that 'Request handling complete' will be logged without any -context. - -The second problem, which is potentially even worse, is that when the Deferred -returned by ``background_operation`` completes, it will restore the original -logcontext. There is nothing waiting on that Deferred, so the logcontext will -leak into the reactor and possibly get attached to some arbitrary future -operation. - -There are two potential solutions to this. - -One option is to surround the call to ``background_operation`` with a -``PreserveLoggingContext`` call. That will reset the logcontext before -starting ``background_operation`` (so the context restored when the deferred -completes will be the empty logcontext), and will restore the current -logcontext before continuing the foreground process: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - yield foreground_operation() - - # start background_operation off in the empty logcontext, to - # avoid leaking the current context into the reactor. - with PreserveLoggingContext(): - background_operation() - - # this will now be logged against the request context - logger.debug("Request handling complete") - -Obviously that option means that the operations done in -``background_operation`` would be not be logged against a logcontext (though -that might be fixed by setting a different logcontext via a ``with -LoggingContext(...)`` in ``background_operation``). - -The second option is to use ``context.run_in_background``, which wraps a -function so that it doesn't reset the logcontext even when it returns an -incomplete deferred, and adds a callback to the returned deferred to reset the -logcontext. In other words, it turns a function that follows the Synapse rules -about logcontexts and Deferreds into one which behaves more like an external -function — the opposite operation to that described in the previous section. -It can be used like this: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - yield foreground_operation() - - context.run_in_background(background_operation) - - # this will now be logged against the request context - logger.debug("Request handling complete") - -Passing synapse deferreds into third-party functions ----------------------------------------------------- - -A typical example of this is where we want to collect together two or more -deferred via ``defer.gatherResults``: - -.. code:: python - - d1 = operation1() - d2 = operation2() - d3 = defer.gatherResults([d1, d2]) - -This is really a variation of the fire-and-forget problem above, in that we are -firing off ``d1`` and ``d2`` without yielding on them. The difference -is that we now have third-party code attached to their callbacks. Anyway either -technique given in the `Fire-and-forget`_ section will work. - -Of course, the new Deferred returned by ``gatherResults`` needs to be wrapped -in order to make it follow the logcontext rules before we can yield it, as -described in `Where you create a new Deferred, make it follow the rules`_. - -So, option one: reset the logcontext before starting the operations to be -gathered: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - with PreserveLoggingContext(): - d1 = operation1() - d2 = operation2() - result = yield defer.gatherResults([d1, d2]) - -In this case particularly, though, option two, of using -``context.preserve_fn`` almost certainly makes more sense, so that -``operation1`` and ``operation2`` are both logged against the original -logcontext. This looks like: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - d1 = context.preserve_fn(operation1)() - d2 = context.preserve_fn(operation2)() - - with PreserveLoggingContext(): - result = yield defer.gatherResults([d1, d2]) - - -Was all this really necessary? ------------------------------- - -The conventions used work fine for a linear flow where everything happens in -series via ``defer.inlineCallbacks`` and ``yield``, but are certainly tricky to -follow for any more exotic flows. It's hard not to wonder if we could have done -something else. - -We're not going to rewrite Synapse now, so the following is entirely of -academic interest, but I'd like to record some thoughts on an alternative -approach. - -I briefly prototyped some code following an alternative set of rules. I think -it would work, but I certainly didn't get as far as thinking how it would -interact with concepts as complicated as the cache descriptors. - -My alternative rules were: - -* functions always preserve the logcontext of their caller, whether or not they - are returning a Deferred. - -* Deferreds returned by synapse functions run their callbacks in the same - context as the function was orignally called in. - -The main point of this scheme is that everywhere that sets the logcontext is -responsible for clearing it before returning control to the reactor. - -So, for example, if you were the function which started a ``with -LoggingContext`` block, you wouldn't ``yield`` within it — instead you'd start -off the background process, and then leave the ``with`` block to wait for it: - -.. code:: python - - def handle_request(request_id): - with context.LoggingContext() as request_context: - request_context.request = request_id - d = do_request_handling() - - def cb(r): - logger.debug("finished") - - d.addCallback(cb) - return d - -(in general, mixing ``with LoggingContext`` blocks and -``defer.inlineCallbacks`` in the same function leads to slighly -counter-intuitive code, under this scheme). - -Because we leave the original ``with`` block as soon as the Deferred is -returned (as opposed to waiting for it to be resolved, as we do today), the -logcontext is cleared before control passes back to the reactor; so if there is -some code within ``do_request_handling`` which needs to wait for a Deferred to -complete, there is no need for it to worry about clearing the logcontext before -doing so: - -.. code:: python - - def handle_request(): - r = do_some_stuff() - r.addCallback(do_some_more_stuff) - return r - -— and provided ``do_some_stuff`` follows the rules of returning a Deferred which -runs its callbacks in the original logcontext, all is happy. - -The business of a Deferred which runs its callbacks in the original logcontext -isn't hard to achieve — we have it today, in the shape of -``context._PreservingContextDeferred``: - -.. code:: python - - def do_some_stuff(): - deferred = do_some_io() - pcd = _PreservingContextDeferred(LoggingContext.current_context()) - deferred.chainDeferred(pcd) - return pcd - -It turns out that, thanks to the way that Deferreds chain together, we -automatically get the property of a context-preserving deferred with -``defer.inlineCallbacks``, provided the final Defered the function ``yields`` -on has that property. So we can just write: - -.. code:: python - - @defer.inlineCallbacks - def handle_request(): - yield do_some_stuff() - yield do_some_more_stuff() - -To conclude: I think this scheme would have worked equally well, with less -danger of messing it up, and probably made some more esoteric code easier to -write. But again — changing the conventions of the entire Synapse codebase is -not a sensible option for the marginal improvement offered. - - -A note on garbage-collection of Deferred chains ------------------------------------------------ - -It turns out that our logcontext rules do not play nicely with Deferred -chains which get orphaned and garbage-collected. - -Imagine we have some code that looks like this: - -.. code:: python - - listener_queue = [] - - def on_something_interesting(): - for d in listener_queue: - d.callback("foo") - - @defer.inlineCallbacks - def await_something_interesting(): - new_deferred = defer.Deferred() - listener_queue.append(new_deferred) - - with PreserveLoggingContext(): - yield new_deferred - -Obviously, the idea here is that we have a bunch of things which are waiting -for an event. (It's just an example of the problem here, but a relatively -common one.) - -Now let's imagine two further things happen. First of all, whatever was -waiting for the interesting thing goes away. (Perhaps the request times out, -or something *even more* interesting happens.) - -Secondly, let's suppose that we decide that the interesting thing is never -going to happen, and we reset the listener queue: - -.. code:: python - - def reset_listener_queue(): - listener_queue.clear() - -So, both ends of the deferred chain have now dropped their references, and the -deferred chain is now orphaned, and will be garbage-collected at some point. -Note that ``await_something_interesting`` is a generator function, and when -Python garbage-collects generator functions, it gives them a chance to clean -up by making the ``yield`` raise a ``GeneratorExit`` exception. In our case, -that means that the ``__exit__`` handler of ``PreserveLoggingContext`` will -carefully restore the request context, but there is now nothing waiting for -its return, so the request context is never cleared. - -To reiterate, this problem only arises when *both* ends of a deferred chain -are dropped. Dropping the the reference to a deferred you're supposed to be -calling is probably bad practice, so this doesn't actually happen too much. -Unfortunately, when it does happen, it will lead to leaked logcontexts which -are incredibly hard to track down. diff --git a/docs/media_repository.md b/docs/media_repository.md new file mode 100644 index 0000000000..1bf8f16f55 --- /dev/null +++ b/docs/media_repository.md @@ -0,0 +1,30 @@ +# Media Repository + +*Synapse implementation-specific details for the media repository* + +The media repository is where attachments and avatar photos are stored. +It stores attachment content and thumbnails for media uploaded by local users. +It caches attachment content and thumbnails for media uploaded by remote users. + +## Storage + +Each item of media is assigned a `media_id` when it is uploaded. +The `media_id` is a randomly chosen, URL safe 24 character string. + +Metadata such as the MIME type, upload time and length are stored in the +sqlite3 database indexed by `media_id`. + +Content is stored on the filesystem under a `"local_content"` directory. + +Thumbnails are stored under a `"local_thumbnails"` directory. + +The item with `media_id` `"aabbccccccccdddddddddddd"` is stored under +`"local_content/aa/bb/ccccccccdddddddddddd"`. Its thumbnail with width +`128` and height `96` and type `"image/jpeg"` is stored under +`"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"` + +Remote content is cached under `"remote_content"` directory. Each item of +remote content is assigned a local `"filesystem_id"` to ensure that the +directory structure `"remote_content/server_name/aa/bb/ccccccccdddddddddddd"` +is appropriate. Thumbnails for remote content are stored under +`"remote_thumbnails/server_name/..."` diff --git a/docs/media_repository.rst b/docs/media_repository.rst deleted file mode 100644 index 1037b5be63..0000000000 --- a/docs/media_repository.rst +++ /dev/null @@ -1,27 +0,0 @@ -Media Repository -================ - -*Synapse implementation-specific details for the media repository* - -The media repository is where attachments and avatar photos are stored. -It stores attachment content and thumbnails for media uploaded by local users. -It caches attachment content and thumbnails for media uploaded by remote users. - -Storage -------- - -Each item of media is assigned a ``media_id`` when it is uploaded. -The ``media_id`` is a randomly chosen, URL safe 24 character string. -Metadata such as the MIME type, upload time and length are stored in the -sqlite3 database indexed by ``media_id``. -Content is stored on the filesystem under a ``"local_content"`` directory. -Thumbnails are stored under a ``"local_thumbnails"`` directory. -The item with ``media_id`` ``"aabbccccccccdddddddddddd"`` is stored under -``"local_content/aa/bb/ccccccccdddddddddddd"``. Its thumbnail with width -``128`` and height ``96`` and type ``"image/jpeg"`` is stored under -``"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"`` -Remote content is cached under ``"remote_content"`` directory. Each item of -remote content is assigned a local "``filesystem_id``" to ensure that the -directory structure ``"remote_content/server_name/aa/bb/ccccccccdddddddddddd"`` -is appropriate. Thumbnails for remote content are stored under -``"remote_thumbnails/server_name/..."`` diff --git a/docs/metrics-howto.md b/docs/metrics-howto.md new file mode 100644 index 0000000000..32abb9f44e --- /dev/null +++ b/docs/metrics-howto.md @@ -0,0 +1,217 @@ +# How to monitor Synapse metrics using Prometheus + +1. Install Prometheus: + + Follow instructions at + + +1. Enable Synapse metrics: + + There are two methods of enabling metrics in Synapse. + + The first serves the metrics as a part of the usual web server and + can be enabled by adding the \"metrics\" resource to the existing + listener as such: + + resources: + - names: + - client + - metrics + + This provides a simple way of adding metrics to your Synapse + installation, and serves under `/_synapse/metrics`. If you do not + wish your metrics be publicly exposed, you will need to either + filter it out at your load balancer, or use the second method. + + The second method runs the metrics server on a different port, in a + different thread to Synapse. This can make it more resilient to + heavy load meaning metrics cannot be retrieved, and can be exposed + to just internal networks easier. The served metrics are available + over HTTP only, and will be available at `/`. + + Add a new listener to homeserver.yaml: + + listeners: + - type: metrics + port: 9000 + bind_addresses: + - '0.0.0.0' + + For both options, you will need to ensure that `enable_metrics` is + set to `True`. + +1. Restart Synapse. + +1. Add a Prometheus target for Synapse. + + It needs to set the `metrics_path` to a non-default value (under + `scrape_configs`): + + - job_name: "synapse" + metrics_path: "/_synapse/metrics" + static_configs: + - targets: ["my.server.here:port"] + + where `my.server.here` is the IP address of Synapse, and `port` is + the listener port configured with the `metrics` resource. + + If your prometheus is older than 1.5.2, you will need to replace + `static_configs` in the above with `target_groups`. + +1. Restart Prometheus. + +## Renaming of metrics & deprecation of old names in 1.2 + +Synapse 1.2 updates the Prometheus metrics to match the naming +convention of the upstream `prometheus_client`. The old names are +considered deprecated and will be removed in a future version of +Synapse. + +| New Name | Old Name | +| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------- | +| python_gc_objects_collected_total | python_gc_objects_collected | +| python_gc_objects_uncollectable_total | python_gc_objects_uncollectable | +| python_gc_collections_total | python_gc_collections | +| process_cpu_seconds_total | process_cpu_seconds | +| synapse_federation_client_sent_transactions_total | synapse_federation_client_sent_transactions | +| synapse_federation_client_events_processed_total | synapse_federation_client_events_processed | +| synapse_event_processing_loop_count_total | synapse_event_processing_loop_count | +| synapse_event_processing_loop_room_count_total | synapse_event_processing_loop_room_count | +| synapse_util_metrics_block_count_total | synapse_util_metrics_block_count | +| synapse_util_metrics_block_time_seconds_total | synapse_util_metrics_block_time_seconds | +| synapse_util_metrics_block_ru_utime_seconds_total | synapse_util_metrics_block_ru_utime_seconds | +| synapse_util_metrics_block_ru_stime_seconds_total | synapse_util_metrics_block_ru_stime_seconds | +| synapse_util_metrics_block_db_txn_count_total | synapse_util_metrics_block_db_txn_count | +| synapse_util_metrics_block_db_txn_duration_seconds_total | synapse_util_metrics_block_db_txn_duration_seconds | +| synapse_util_metrics_block_db_sched_duration_seconds_total | synapse_util_metrics_block_db_sched_duration_seconds | +| synapse_background_process_start_count_total | synapse_background_process_start_count | +| synapse_background_process_ru_utime_seconds_total | synapse_background_process_ru_utime_seconds | +| synapse_background_process_ru_stime_seconds_total | synapse_background_process_ru_stime_seconds | +| synapse_background_process_db_txn_count_total | synapse_background_process_db_txn_count | +| synapse_background_process_db_txn_duration_seconds_total | synapse_background_process_db_txn_duration_seconds | +| synapse_background_process_db_sched_duration_seconds_total | synapse_background_process_db_sched_duration_seconds | +| synapse_storage_events_persisted_events_total | synapse_storage_events_persisted_events | +| synapse_storage_events_persisted_events_sep_total | synapse_storage_events_persisted_events_sep | +| synapse_storage_events_state_delta_total | synapse_storage_events_state_delta | +| synapse_storage_events_state_delta_single_event_total | synapse_storage_events_state_delta_single_event | +| synapse_storage_events_state_delta_reuse_delta_total | synapse_storage_events_state_delta_reuse_delta | +| synapse_federation_server_received_pdus_total | synapse_federation_server_received_pdus | +| synapse_federation_server_received_edus_total | synapse_federation_server_received_edus | +| synapse_handler_presence_notified_presence_total | synapse_handler_presence_notified_presence | +| synapse_handler_presence_federation_presence_out_total | synapse_handler_presence_federation_presence_out | +| synapse_handler_presence_presence_updates_total | synapse_handler_presence_presence_updates | +| synapse_handler_presence_timers_fired_total | synapse_handler_presence_timers_fired | +| synapse_handler_presence_federation_presence_total | synapse_handler_presence_federation_presence | +| synapse_handler_presence_bump_active_time_total | synapse_handler_presence_bump_active_time | +| synapse_federation_client_sent_edus_total | synapse_federation_client_sent_edus | +| synapse_federation_client_sent_pdu_destinations_count_total | synapse_federation_client_sent_pdu_destinations:count | +| synapse_federation_client_sent_pdu_destinations_total | synapse_federation_client_sent_pdu_destinations:total | +| synapse_handlers_appservice_events_processed_total | synapse_handlers_appservice_events_processed | +| synapse_notifier_notified_events_total | synapse_notifier_notified_events | +| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter | +| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter | +| synapse_http_httppusher_http_pushes_processed_total | synapse_http_httppusher_http_pushes_processed | +| synapse_http_httppusher_http_pushes_failed_total | synapse_http_httppusher_http_pushes_failed | +| synapse_http_httppusher_badge_updates_processed_total | synapse_http_httppusher_badge_updates_processed | +| synapse_http_httppusher_badge_updates_failed_total | synapse_http_httppusher_badge_updates_failed | + +Removal of deprecated metrics & time based counters becoming histograms in 0.31.0 +--------------------------------------------------------------------------------- + +The duplicated metrics deprecated in Synapse 0.27.0 have been removed. + +All time duration-based metrics have been changed to be seconds. This +affects: + +| msec -> sec metrics | +| -------------------------------------- | +| python_gc_time | +| python_twisted_reactor_tick_time | +| synapse_storage_query_time | +| synapse_storage_schedule_time | +| synapse_storage_transaction_time | + +Several metrics have been changed to be histograms, which sort entries +into buckets and allow better analysis. The following metrics are now +histograms: + +| Altered metrics | +| ------------------------------------------------ | +| python_gc_time | +| python_twisted_reactor_pending_calls | +| python_twisted_reactor_tick_time | +| synapse_http_server_response_time_seconds | +| synapse_storage_query_time | +| synapse_storage_schedule_time | +| synapse_storage_transaction_time | + +Block and response metrics renamed for 0.27.0 +--------------------------------------------- + +Synapse 0.27.0 begins the process of rationalising the duplicate +`*:count` metrics reported for the resource tracking for code blocks and +HTTP requests. + +At the same time, the corresponding `*:total` metrics are being renamed, +as the `:total` suffix no longer makes sense in the absence of a +corresponding `:count` metric. + +To enable a graceful migration path, this release just adds new names +for the metrics being renamed. A future release will remove the old +ones. + +The following table shows the new metrics, and the old metrics which +they are replacing. + +| New name | Old name | +| ------------------------------------------------------------- | ---------------------------------------------------------- | +| synapse_util_metrics_block_count | synapse_util_metrics_block_timer:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_ru_utime:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_ru_stime:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_db_txn_count:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_db_txn_duration:count | +| synapse_util_metrics_block_time_seconds | synapse_util_metrics_block_timer:total | +| synapse_util_metrics_block_ru_utime_seconds | synapse_util_metrics_block_ru_utime:total | +| synapse_util_metrics_block_ru_stime_seconds | synapse_util_metrics_block_ru_stime:total | +| synapse_util_metrics_block_db_txn_count | synapse_util_metrics_block_db_txn_count:total | +| synapse_util_metrics_block_db_txn_duration_seconds | synapse_util_metrics_block_db_txn_duration:total | +| synapse_http_server_response_count | synapse_http_server_requests | +| synapse_http_server_response_count | synapse_http_server_response_time:count | +| synapse_http_server_response_count | synapse_http_server_response_ru_utime:count | +| synapse_http_server_response_count | synapse_http_server_response_ru_stime:count | +| synapse_http_server_response_count | synapse_http_server_response_db_txn_count:count | +| synapse_http_server_response_count | synapse_http_server_response_db_txn_duration:count | +| synapse_http_server_response_time_seconds | synapse_http_server_response_time:total | +| synapse_http_server_response_ru_utime_seconds | synapse_http_server_response_ru_utime:total | +| synapse_http_server_response_ru_stime_seconds | synapse_http_server_response_ru_stime:total | +| synapse_http_server_response_db_txn_count | synapse_http_server_response_db_txn_count:total | +| synapse_http_server_response_db_txn_duration_seconds | synapse_http_server_response_db_txn_duration:total | + +Standard Metric Names +--------------------- + +As of synapse version 0.18.2, the format of the process-wide metrics has +been changed to fit prometheus standard naming conventions. Additionally +the units have been changed to seconds, from miliseconds. + +| New name | Old name | +| ---------------------------------------- | --------------------------------- | +| process_cpu_user_seconds_total | process_resource_utime / 1000 | +| process_cpu_system_seconds_total | process_resource_stime / 1000 | +| process_open_fds (no \'type\' label) | process_fds | + +The python-specific counts of garbage collector performance have been +renamed. + +| New name | Old name | +| -------------------------------- | -------------------------- | +| python_gc_time | reactor_gc_time | +| python_gc_unreachable_total | reactor_gc_unreachable | +| python_gc_counts | reactor_gc_counts | + +The twisted-specific reactor metrics have been renamed. + +| New name | Old name | +| -------------------------------------- | ----------------------- | +| python_twisted_reactor_pending_calls | reactor_pending_calls | +| python_twisted_reactor_tick_time | reactor_tick_time | diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst deleted file mode 100644 index 973641f3dc..0000000000 --- a/docs/metrics-howto.rst +++ /dev/null @@ -1,285 +0,0 @@ -How to monitor Synapse metrics using Prometheus -=============================================== - -1. Install Prometheus: - - Follow instructions at http://prometheus.io/docs/introduction/install/ - -2. Enable Synapse metrics: - - There are two methods of enabling metrics in Synapse. - - The first serves the metrics as a part of the usual web server and can be - enabled by adding the "metrics" resource to the existing listener as such:: - - resources: - - names: - - client - - metrics - - This provides a simple way of adding metrics to your Synapse installation, - and serves under ``/_synapse/metrics``. If you do not wish your metrics be - publicly exposed, you will need to either filter it out at your load - balancer, or use the second method. - - The second method runs the metrics server on a different port, in a - different thread to Synapse. This can make it more resilient to heavy load - meaning metrics cannot be retrieved, and can be exposed to just internal - networks easier. The served metrics are available over HTTP only, and will - be available at ``/``. - - Add a new listener to homeserver.yaml:: - - listeners: - - type: metrics - port: 9000 - bind_addresses: - - '0.0.0.0' - - For both options, you will need to ensure that ``enable_metrics`` is set to - ``True``. - - Restart Synapse. - -3. Add a Prometheus target for Synapse. - - It needs to set the ``metrics_path`` to a non-default value (under ``scrape_configs``):: - - - job_name: "synapse" - metrics_path: "/_synapse/metrics" - static_configs: - - targets: ["my.server.here:port"] - - where ``my.server.here`` is the IP address of Synapse, and ``port`` is the listener port - configured with the ``metrics`` resource. - - If your prometheus is older than 1.5.2, you will need to replace - ``static_configs`` in the above with ``target_groups``. - - Restart Prometheus. - - -Renaming of metrics & deprecation of old names in 1.2 ------------------------------------------------------ - -Synapse 1.2 updates the Prometheus metrics to match the naming convention of the -upstream ``prometheus_client``. The old names are considered deprecated and will -be removed in a future version of Synapse. - -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| New Name | Old Name | -+=============================================================================+=======================================================================+ -| python_gc_objects_collected_total | python_gc_objects_collected | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| python_gc_objects_uncollectable_total | python_gc_objects_uncollectable | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| python_gc_collections_total | python_gc_collections | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| process_cpu_seconds_total | process_cpu_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_transactions_total | synapse_federation_client_sent_transactions | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_events_processed_total | synapse_federation_client_events_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_event_processing_loop_count_total | synapse_event_processing_loop_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_event_processing_loop_room_count_total | synapse_event_processing_loop_room_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_count_total | synapse_util_metrics_block_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_time_seconds_total | synapse_util_metrics_block_time_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_ru_utime_seconds_total | synapse_util_metrics_block_ru_utime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_ru_stime_seconds_total | synapse_util_metrics_block_ru_stime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_db_txn_count_total | synapse_util_metrics_block_db_txn_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_db_txn_duration_seconds_total | synapse_util_metrics_block_db_txn_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_db_sched_duration_seconds_total | synapse_util_metrics_block_db_sched_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_start_count_total | synapse_background_process_start_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_ru_utime_seconds_total | synapse_background_process_ru_utime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_ru_stime_seconds_total | synapse_background_process_ru_stime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_db_txn_count_total | synapse_background_process_db_txn_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_db_txn_duration_seconds_total | synapse_background_process_db_txn_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_db_sched_duration_seconds_total | synapse_background_process_db_sched_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_persisted_events_total | synapse_storage_events_persisted_events | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_persisted_events_sep_total | synapse_storage_events_persisted_events_sep | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_state_delta_total | synapse_storage_events_state_delta | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_state_delta_single_event_total | synapse_storage_events_state_delta_single_event | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_state_delta_reuse_delta_total | synapse_storage_events_state_delta_reuse_delta | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_server_received_pdus_total | synapse_federation_server_received_pdus | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_server_received_edus_total | synapse_federation_server_received_edus | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_notified_presence_total | synapse_handler_presence_notified_presence | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_federation_presence_out_total | synapse_handler_presence_federation_presence_out | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_presence_updates_total | synapse_handler_presence_presence_updates | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_timers_fired_total | synapse_handler_presence_timers_fired | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_federation_presence_total | synapse_handler_presence_federation_presence | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_bump_active_time_total | synapse_handler_presence_bump_active_time | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_edus_total | synapse_federation_client_sent_edus | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_pdu_destinations_count_total | synapse_federation_client_sent_pdu_destinations:count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_pdu_destinations_total | synapse_federation_client_sent_pdu_destinations:total | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handlers_appservice_events_processed_total | synapse_handlers_appservice_events_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_notifier_notified_events_total | synapse_notifier_notified_events | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_http_pushes_processed_total | synapse_http_httppusher_http_pushes_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_http_pushes_failed_total | synapse_http_httppusher_http_pushes_failed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_badge_updates_processed_total | synapse_http_httppusher_badge_updates_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_badge_updates_failed_total | synapse_http_httppusher_badge_updates_failed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ - - -Removal of deprecated metrics & time based counters becoming histograms in 0.31.0 ---------------------------------------------------------------------------------- - -The duplicated metrics deprecated in Synapse 0.27.0 have been removed. - -All time duration-based metrics have been changed to be seconds. This affects: - -+----------------------------------+ -| msec -> sec metrics | -+==================================+ -| python_gc_time | -+----------------------------------+ -| python_twisted_reactor_tick_time | -+----------------------------------+ -| synapse_storage_query_time | -+----------------------------------+ -| synapse_storage_schedule_time | -+----------------------------------+ -| synapse_storage_transaction_time | -+----------------------------------+ - -Several metrics have been changed to be histograms, which sort entries into -buckets and allow better analysis. The following metrics are now histograms: - -+-------------------------------------------+ -| Altered metrics | -+===========================================+ -| python_gc_time | -+-------------------------------------------+ -| python_twisted_reactor_pending_calls | -+-------------------------------------------+ -| python_twisted_reactor_tick_time | -+-------------------------------------------+ -| synapse_http_server_response_time_seconds | -+-------------------------------------------+ -| synapse_storage_query_time | -+-------------------------------------------+ -| synapse_storage_schedule_time | -+-------------------------------------------+ -| synapse_storage_transaction_time | -+-------------------------------------------+ - - -Block and response metrics renamed for 0.27.0 ---------------------------------------------- - -Synapse 0.27.0 begins the process of rationalising the duplicate ``*:count`` -metrics reported for the resource tracking for code blocks and HTTP requests. - -At the same time, the corresponding ``*:total`` metrics are being renamed, as -the ``:total`` suffix no longer makes sense in the absence of a corresponding -``:count`` metric. - -To enable a graceful migration path, this release just adds new names for the -metrics being renamed. A future release will remove the old ones. - -The following table shows the new metrics, and the old metrics which they are -replacing. - -==================================================== =================================================== -New name Old name -==================================================== =================================================== -synapse_util_metrics_block_count synapse_util_metrics_block_timer:count -synapse_util_metrics_block_count synapse_util_metrics_block_ru_utime:count -synapse_util_metrics_block_count synapse_util_metrics_block_ru_stime:count -synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_count:count -synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_duration:count - -synapse_util_metrics_block_time_seconds synapse_util_metrics_block_timer:total -synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_utime:total -synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_ru_stime:total -synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_count:total -synapse_util_metrics_block_db_txn_duration_seconds synapse_util_metrics_block_db_txn_duration:total - -synapse_http_server_response_count synapse_http_server_requests -synapse_http_server_response_count synapse_http_server_response_time:count -synapse_http_server_response_count synapse_http_server_response_ru_utime:count -synapse_http_server_response_count synapse_http_server_response_ru_stime:count -synapse_http_server_response_count synapse_http_server_response_db_txn_count:count -synapse_http_server_response_count synapse_http_server_response_db_txn_duration:count - -synapse_http_server_response_time_seconds synapse_http_server_response_time:total -synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_utime:total -synapse_http_server_response_ru_stime_seconds synapse_http_server_response_ru_stime:total -synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_count:total -synapse_http_server_response_db_txn_duration_seconds synapse_http_server_response_db_txn_duration:total -==================================================== =================================================== - - -Standard Metric Names ---------------------- - -As of synapse version 0.18.2, the format of the process-wide metrics has been -changed to fit prometheus standard naming conventions. Additionally the units -have been changed to seconds, from miliseconds. - -================================== ============================= -New name Old name -================================== ============================= -process_cpu_user_seconds_total process_resource_utime / 1000 -process_cpu_system_seconds_total process_resource_stime / 1000 -process_open_fds (no 'type' label) process_fds -================================== ============================= - -The python-specific counts of garbage collector performance have been renamed. - -=========================== ====================== -New name Old name -=========================== ====================== -python_gc_time reactor_gc_time -python_gc_unreachable_total reactor_gc_unreachable -python_gc_counts reactor_gc_counts -=========================== ====================== - -The twisted-specific reactor metrics have been renamed. - -==================================== ===================== -New name Old name -==================================== ===================== -python_twisted_reactor_pending_calls reactor_pending_calls -python_twisted_reactor_tick_time reactor_tick_time -==================================== ===================== diff --git a/docs/opentracing.md b/docs/opentracing.md new file mode 100644 index 0000000000..4c7a56a5d7 --- /dev/null +++ b/docs/opentracing.md @@ -0,0 +1,93 @@ +# OpenTracing + +## Background + +OpenTracing is a semi-standard being adopted by a number of distributed +tracing platforms. It is a common api for facilitating vendor-agnostic +tracing instrumentation. That is, we can use the OpenTracing api and +select one of a number of tracer implementations to do the heavy lifting +in the background. Our current selected implementation is Jaeger. + +OpenTracing is a tool which gives an insight into the causal +relationship of work done in and between servers. The servers each track +events and report them to a centralised server - in Synapse's case: +Jaeger. The basic unit used to represent events is the span. The span +roughly represents a single piece of work that was done and the time at +which it occurred. A span can have child spans, meaning that the work of +the child had to be completed for the parent span to complete, or it can +have follow-on spans which represent work that is undertaken as a result +of the parent but is not depended on by the parent to in order to +finish. + +Since this is undertaken in a distributed environment a request to +another server, such as an RPC or a simple GET, can be considered a span +(a unit or work) for the local server. This causal link is what +OpenTracing aims to capture and visualise. In order to do this metadata +about the local server's span, i.e the 'span context', needs to be +included with the request to the remote. + +It is up to the remote server to decide what it does with the spans it +creates. This is called the sampling policy and it can be configured +through Jaeger's settings. + +For OpenTracing concepts see +. + +For more information about Jaeger's implementation see + + +## Setting up OpenTracing + +To receive OpenTracing spans, start up a Jaeger server. This can be done +using docker like so: + +```sh +docker run -d --name jaeger + -p 6831:6831/udp \ + -p 6832:6832/udp \ + -p 5778:5778 \ + -p 16686:16686 \ + -p 14268:14268 \ + jaegertracing/all-in-one:1.13 +``` + +Latest documentation is probably at + + +## Enable OpenTracing in Synapse + +OpenTracing is not enabled by default. It must be enabled in the +homeserver config by uncommenting the config options under `opentracing` +as shown in the [sample config](./sample_config.yaml). For example: + +```yaml +opentracing: + tracer_enabled: true + homeserver_whitelist: + - "mytrustedhomeserver.org" + - "*.myotherhomeservers.com" +``` + +## Homeserver whitelisting + +The homeserver whitelist is configured using regular expressions. A list +of regular expressions can be given and their union will be compared +when propagating any spans contexts to another homeserver. + +Though it's mostly safe to send and receive span contexts to and from +untrusted users since span contexts are usually opaque ids it can lead +to two problems, namely: + +- If the span context is marked as sampled by the sending homeserver + the receiver will sample it. Therefore two homeservers with wildly + different sampling policies could incur higher sampling counts than + intended. +- Sending servers can attach arbitrary data to spans, known as + 'baggage'. For safety this has been disabled in Synapse but that + doesn't prevent another server sending you baggage which will be + logged to OpenTracing's logs. + +## Configuring Jaeger + +Sampling strategies can be set as in this document: + diff --git a/docs/opentracing.rst b/docs/opentracing.rst deleted file mode 100644 index 6e98ab56ba..0000000000 --- a/docs/opentracing.rst +++ /dev/null @@ -1,123 +0,0 @@ -=========== -OpenTracing -=========== - -Background ----------- - -OpenTracing is a semi-standard being adopted by a number of distributed tracing -platforms. It is a common api for facilitating vendor-agnostic tracing -instrumentation. That is, we can use the OpenTracing api and select one of a -number of tracer implementations to do the heavy lifting in the background. -Our current selected implementation is Jaeger. - -OpenTracing is a tool which gives an insight into the causal relationship of -work done in and between servers. The servers each track events and report them -to a centralised server - in Synapse's case: Jaeger. The basic unit used to -represent events is the span. The span roughly represents a single piece of work -that was done and the time at which it occurred. A span can have child spans, -meaning that the work of the child had to be completed for the parent span to -complete, or it can have follow-on spans which represent work that is undertaken -as a result of the parent but is not depended on by the parent to in order to -finish. - -Since this is undertaken in a distributed environment a request to another -server, such as an RPC or a simple GET, can be considered a span (a unit or -work) for the local server. This causal link is what OpenTracing aims to -capture and visualise. In order to do this metadata about the local server's -span, i.e the 'span context', needs to be included with the request to the -remote. - -It is up to the remote server to decide what it does with the spans -it creates. This is called the sampling policy and it can be configured -through Jaeger's settings. - -For OpenTracing concepts see -https://opentracing.io/docs/overview/what-is-tracing/. - -For more information about Jaeger's implementation see -https://www.jaegertracing.io/docs/ - -===================== -Seting up OpenTracing -===================== - -To receive OpenTracing spans, start up a Jaeger server. This can be done -using docker like so: - -.. code-block:: bash - - docker run -d --name jaeger - -p 6831:6831/udp \ - -p 6832:6832/udp \ - -p 5778:5778 \ - -p 16686:16686 \ - -p 14268:14268 \ - jaegertracing/all-in-one:1.13 - -Latest documentation is probably at -https://www.jaegertracing.io/docs/1.13/getting-started/ - - -Enable OpenTracing in Synapse ------------------------------ - -OpenTracing is not enabled by default. It must be enabled in the homeserver -config by uncommenting the config options under ``opentracing`` as shown in -the `sample config <./sample_config.yaml>`_. For example: - -.. code-block:: yaml - - opentracing: - tracer_enabled: true - homeserver_whitelist: - - "mytrustedhomeserver.org" - - "*.myotherhomeservers.com" - -Homeserver whitelisting ------------------------ - -The homeserver whitelist is configured using regular expressions. A list of regular -expressions can be given and their union will be compared when propagating any -spans contexts to another homeserver. - -Though it's mostly safe to send and receive span contexts to and from -untrusted users since span contexts are usually opaque ids it can lead to -two problems, namely: - -- If the span context is marked as sampled by the sending homeserver the receiver will - sample it. Therefore two homeservers with wildly different sampling policies - could incur higher sampling counts than intended. -- Sending servers can attach arbitrary data to spans, known as 'baggage'. For safety this has been disabled in Synapse - but that doesn't prevent another server sending you baggage which will be logged - to OpenTracing's logs. - -========== -EDU FORMAT -========== - -EDUs can contain tracing data in their content. This is not specced but -it could be of interest for other homeservers. - -EDU format (if you're using jaeger): - -.. code-block:: json - - { - "edu_type": "type", - "content": { - "org.matrix.opentracing_context": { - "uber-trace-id": "fe57cf3e65083289" - } - } - } - -Though you don't have to use jaeger you must inject the span context into -`org.matrix.opentracing_context` using the opentracing `Format.TEXT_MAP` inject method. - -================== -Configuring Jaeger -================== - -Sampling strategies can be set as in this document: -https://www.jaegertracing.io/docs/1.13/sampling/ diff --git a/docs/password_auth_providers.md b/docs/password_auth_providers.md new file mode 100644 index 0000000000..0db1a3804a --- /dev/null +++ b/docs/password_auth_providers.md @@ -0,0 +1,116 @@ +# Password auth provider modules + +Password auth providers offer a way for server administrators to +integrate their Synapse installation with an existing authentication +system. + +A password auth provider is a Python class which is dynamically loaded +into Synapse, and provides a number of methods by which it can integrate +with the authentication system. + +This document serves as a reference for those looking to implement their +own password auth providers. + +## Required methods + +Password auth provider classes must provide the following methods: + +*class* `SomeProvider.parse_config`(*config*) + +> This method is passed the `config` object for this module from the +> homeserver configuration file. +> +> It should perform any appropriate sanity checks on the provided +> configuration, and return an object which is then passed into +> `__init__`. + +*class* `SomeProvider`(*config*, *account_handler*) + +> The constructor is passed the config object returned by +> `parse_config`, and a `synapse.module_api.ModuleApi` object which +> allows the password provider to check if accounts exist and/or create +> new ones. + +## Optional methods + +Password auth provider classes may optionally provide the following +methods. + +*class* `SomeProvider.get_db_schema_files`() + +> This method, if implemented, should return an Iterable of +> `(name, stream)` pairs of database schema files. Each file is applied +> in turn at initialisation, and a record is then made in the database +> so that it is not re-applied on the next start. + +`someprovider.get_supported_login_types`() + +> This method, if implemented, should return a `dict` mapping from a +> login type identifier (such as `m.login.password`) to an iterable +> giving the fields which must be provided by the user in the submission +> to the `/login` api. These fields are passed in the `login_dict` +> dictionary to `check_auth`. +> +> For example, if a password auth provider wants to implement a custom +> login type of `com.example.custom_login`, where the client is expected +> to pass the fields `secret1` and `secret2`, the provider should +> implement this method and return the following dict: +> +> {"com.example.custom_login": ("secret1", "secret2")} + +`someprovider.check_auth`(*username*, *login_type*, *login_dict*) + +> This method is the one that does the real work. If implemented, it +> will be called for each login attempt where the login type matches one +> of the keys returned by `get_supported_login_types`. +> +> It is passed the (possibly UNqualified) `user` provided by the client, +> the login type, and a dictionary of login secrets passed by the +> client. +> +> The method should return a Twisted `Deferred` object, which resolves +> to the canonical `@localpart:domain` user id if authentication is +> successful, and `None` if not. +> +> Alternatively, the `Deferred` can resolve to a `(str, func)` tuple, in +> which case the second field is a callback which will be called with +> the result from the `/login` call (including `access_token`, +> `device_id`, etc.) + +`someprovider.check_3pid_auth`(*medium*, *address*, *password*) + +> This method, if implemented, is called when a user attempts to +> register or log in with a third party identifier, such as email. It is +> passed the medium (ex. "email"), an address (ex. +> "") and the user's password. +> +> The method should return a Twisted `Deferred` object, which resolves +> to a `str` containing the user's (canonical) User ID if +> authentication was successful, and `None` if not. +> +> As with `check_auth`, the `Deferred` may alternatively resolve to a +> `(user_id, callback)` tuple. + +`someprovider.check_password`(*user_id*, *password*) + +> This method provides a simpler interface than +> `get_supported_login_types` and `check_auth` for password auth +> providers that just want to provide a mechanism for validating +> `m.login.password` logins. +> +> Iif implemented, it will be called to check logins with an +> `m.login.password` login type. It is passed a qualified +> `@localpart:domain` user id, and the password provided by the user. +> +> The method should return a Twisted `Deferred` object, which resolves +> to `True` if authentication is successful, and `False` if not. + +`someprovider.on_logged_out`(*user_id*, *device_id*, *access_token*) + +> This method, if implemented, is called when a user logs out. It is +> passed the qualified user ID, the ID of the deactivated device (if +> any: access tokens are occasionally created without an associated +> device ID), and the (now deactivated) access token. +> +> It may return a Twisted `Deferred` object; the logout request will +> wait for the deferred to complete but the result is ignored. diff --git a/docs/password_auth_providers.rst b/docs/password_auth_providers.rst deleted file mode 100644 index 6149ba7458..0000000000 --- a/docs/password_auth_providers.rst +++ /dev/null @@ -1,113 +0,0 @@ -Password auth provider modules -============================== - -Password auth providers offer a way for server administrators to integrate -their Synapse installation with an existing authentication system. - -A password auth provider is a Python class which is dynamically loaded into -Synapse, and provides a number of methods by which it can integrate with the -authentication system. - -This document serves as a reference for those looking to implement their own -password auth providers. - -Required methods ----------------- - -Password auth provider classes must provide the following methods: - -*class* ``SomeProvider.parse_config``\(*config*) - - This method is passed the ``config`` object for this module from the - homeserver configuration file. - - It should perform any appropriate sanity checks on the provided - configuration, and return an object which is then passed into ``__init__``. - -*class* ``SomeProvider``\(*config*, *account_handler*) - - The constructor is passed the config object returned by ``parse_config``, - and a ``synapse.module_api.ModuleApi`` object which allows the - password provider to check if accounts exist and/or create new ones. - -Optional methods ----------------- - -Password auth provider classes may optionally provide the following methods. - -*class* ``SomeProvider.get_db_schema_files``\() - - This method, if implemented, should return an Iterable of ``(name, - stream)`` pairs of database schema files. Each file is applied in turn at - initialisation, and a record is then made in the database so that it is - not re-applied on the next start. - -``someprovider.get_supported_login_types``\() - - This method, if implemented, should return a ``dict`` mapping from a login - type identifier (such as ``m.login.password``) to an iterable giving the - fields which must be provided by the user in the submission to the - ``/login`` api. These fields are passed in the ``login_dict`` dictionary - to ``check_auth``. - - For example, if a password auth provider wants to implement a custom login - type of ``com.example.custom_login``, where the client is expected to pass - the fields ``secret1`` and ``secret2``, the provider should implement this - method and return the following dict:: - - {"com.example.custom_login": ("secret1", "secret2")} - -``someprovider.check_auth``\(*username*, *login_type*, *login_dict*) - - This method is the one that does the real work. If implemented, it will be - called for each login attempt where the login type matches one of the keys - returned by ``get_supported_login_types``. - - It is passed the (possibly UNqualified) ``user`` provided by the client, - the login type, and a dictionary of login secrets passed by the client. - - The method should return a Twisted ``Deferred`` object, which resolves to - the canonical ``@localpart:domain`` user id if authentication is successful, - and ``None`` if not. - - Alternatively, the ``Deferred`` can resolve to a ``(str, func)`` tuple, in - which case the second field is a callback which will be called with the - result from the ``/login`` call (including ``access_token``, ``device_id``, - etc.) - -``someprovider.check_3pid_auth``\(*medium*, *address*, *password*) - - This method, if implemented, is called when a user attempts to register or - log in with a third party identifier, such as email. It is passed the - medium (ex. "email"), an address (ex. "jdoe@example.com") and the user's - password. - - The method should return a Twisted ``Deferred`` object, which resolves to - a ``str`` containing the user's (canonical) User ID if authentication was - successful, and ``None`` if not. - - As with ``check_auth``, the ``Deferred`` may alternatively resolve to a - ``(user_id, callback)`` tuple. - -``someprovider.check_password``\(*user_id*, *password*) - - This method provides a simpler interface than ``get_supported_login_types`` - and ``check_auth`` for password auth providers that just want to provide a - mechanism for validating ``m.login.password`` logins. - - Iif implemented, it will be called to check logins with an - ``m.login.password`` login type. It is passed a qualified - ``@localpart:domain`` user id, and the password provided by the user. - - The method should return a Twisted ``Deferred`` object, which resolves to - ``True`` if authentication is successful, and ``False`` if not. - -``someprovider.on_logged_out``\(*user_id*, *device_id*, *access_token*) - - This method, if implemented, is called when a user logs out. It is passed - the qualified user ID, the ID of the deactivated device (if any: access - tokens are occasionally created without an associated device ID), and the - (now deactivated) access token. - - It may return a Twisted ``Deferred`` object; the logout request will wait - for the deferred to complete but the result is ignored. diff --git a/docs/postgres.md b/docs/postgres.md new file mode 100644 index 0000000000..29cf762858 --- /dev/null +++ b/docs/postgres.md @@ -0,0 +1,164 @@ +# Using Postgres + +Postgres version 9.5 or later is known to work. + +## Install postgres client libraries + +Synapse will require the python postgres client library in order to +connect to a postgres database. + +- If you are using the [matrix.org debian/ubuntu + packages](../INSTALL.md#matrixorg-packages), the necessary python + library will already be installed, but you will need to ensure the + low-level postgres library is installed, which you can do with + `apt install libpq5`. +- For other pre-built packages, please consult the documentation from + the relevant package. +- If you installed synapse [in a + virtualenv](../INSTALL.md#installing-from-source), you can install + the library with: + + ~/synapse/env/bin/pip install matrix-synapse[postgres] + + (substituting the path to your virtualenv for `~/synapse/env`, if + you used a different path). You will require the postgres + development files. These are in the `libpq-dev` package on + Debian-derived distributions. + +## Set up database + +Assuming your PostgreSQL database user is called `postgres`, create a +user `synapse_user` with: + + su - postgres + createuser --pwprompt synapse_user + +Before you can authenticate with the `synapse_user`, you must create a +database that it can access. To create a database, first connect to the +database with your database user: + + su - postgres + psql + +and then run: + + CREATE DATABASE synapse + ENCODING 'UTF8' + LC_COLLATE='C' + LC_CTYPE='C' + template=template0 + OWNER synapse_user; + +This would create an appropriate database named `synapse` owned by the +`synapse_user` user (which must already have been created as above). + +Note that the PostgreSQL database *must* have the correct encoding set +(as shown above), otherwise it will not be able to store UTF8 strings. + +You may need to enable password authentication so `synapse_user` can +connect to the database. See +. + +## Tuning Postgres + +The default settings should be fine for most deployments. For larger +scale deployments tuning some of the settings is recommended, details of +which can be found at +. + +In particular, we've found tuning the following values helpful for +performance: + +- `shared_buffers` +- `effective_cache_size` +- `work_mem` +- `maintenance_work_mem` +- `autovacuum_work_mem` + +Note that the appropriate values for those fields depend on the amount +of free memory the database host has available. + +## Synapse config + +When you are ready to start using PostgreSQL, edit the `database` +section in your config file to match the following lines: + + database: + name: psycopg2 + args: + user: + password: + database: + host: + cp_min: 5 + cp_max: 10 + +All key, values in `args` are passed to the `psycopg2.connect(..)` +function, except keys beginning with `cp_`, which are consumed by the +twisted adbapi connection pool. + +## Porting from SQLite + +### Overview + +The script `synapse_port_db` allows porting an existing synapse server +backed by SQLite to using PostgreSQL. This is done in as a two phase +process: + +1. Copy the existing SQLite database to a separate location (while the + server is down) and running the port script against that offline + database. +2. Shut down the server. Rerun the port script to port any data that + has come in since taking the first snapshot. Restart server against + the PostgreSQL database. + +The port script is designed to be run repeatedly against newer snapshots +of the SQLite database file. This makes it safe to repeat step 1 if +there was a delay between taking the previous snapshot and being ready +to do step 2. + +It is safe to at any time kill the port script and restart it. + +### Using the port script + +Firstly, shut down the currently running synapse server and copy its +database file (typically `homeserver.db`) to another location. Once the +copy is complete, restart synapse. For instance: + + ./synctl stop + cp homeserver.db homeserver.db.snapshot + ./synctl start + +Copy the old config file into a new config file: + + cp homeserver.yaml homeserver-postgres.yaml + +Edit the database section as described in the section *Synapse config* +above and with the SQLite snapshot located at `homeserver.db.snapshot` +simply run: + + synapse_port_db --sqlite-database homeserver.db.snapshot \ + --postgres-config homeserver-postgres.yaml + +The flag `--curses` displays a coloured curses progress UI. + +If the script took a long time to complete, or time has otherwise passed +since the original snapshot was taken, repeat the previous steps with a +newer snapshot. + +To complete the conversion shut down the synapse server and run the port +script one last time, e.g. if the SQLite database is at `homeserver.db` +run: + + synapse_port_db --sqlite-database homeserver.db \ + --postgres-config homeserver-postgres.yaml + +Once that has completed, change the synapse config to point at the +PostgreSQL database configuration file `homeserver-postgres.yaml`: + + ./synctl stop + mv homeserver.yaml homeserver-old-sqlite.yaml + mv homeserver-postgres.yaml homeserver.yaml + ./synctl start + +Synapse should now be running against PostgreSQL. diff --git a/docs/postgres.rst b/docs/postgres.rst deleted file mode 100644 index e08a5116b9..0000000000 --- a/docs/postgres.rst +++ /dev/null @@ -1,166 +0,0 @@ -Using Postgres --------------- - -Postgres version 9.5 or later is known to work. - -Install postgres client libraries -================================= - -Synapse will require the python postgres client library in order to connect to -a postgres database. - -* If you are using the `matrix.org debian/ubuntu - packages <../INSTALL.md#matrixorg-packages>`_, - the necessary python library will already be installed, but you will need to - ensure the low-level postgres library is installed, which you can do with - ``apt install libpq5``. - -* For other pre-built packages, please consult the documentation from the - relevant package. - -* If you installed synapse `in a virtualenv - <../INSTALL.md#installing-from-source>`_, you can install the library with:: - - ~/synapse/env/bin/pip install matrix-synapse[postgres] - - (substituting the path to your virtualenv for ``~/synapse/env``, if you used a - different path). You will require the postgres development files. These are in - the ``libpq-dev`` package on Debian-derived distributions. - -Set up database -=============== - -Assuming your PostgreSQL database user is called ``postgres``, create a user -``synapse_user`` with:: - - su - postgres - createuser --pwprompt synapse_user - -Before you can authenticate with the ``synapse_user``, you must create a -database that it can access. To create a database, first connect to the database -with your database user:: - - su - postgres - psql - -and then run:: - - CREATE DATABASE synapse - ENCODING 'UTF8' - LC_COLLATE='C' - LC_CTYPE='C' - template=template0 - OWNER synapse_user; - -This would create an appropriate database named ``synapse`` owned by the -``synapse_user`` user (which must already have been created as above). - -Note that the PostgreSQL database *must* have the correct encoding set (as -shown above), otherwise it will not be able to store UTF8 strings. - -You may need to enable password authentication so ``synapse_user`` can connect -to the database. See https://www.postgresql.org/docs/11/auth-pg-hba-conf.html. - -Tuning Postgres -=============== - -The default settings should be fine for most deployments. For larger scale -deployments tuning some of the settings is recommended, details of which can be -found at https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server. - -In particular, we've found tuning the following values helpful for performance: - -- ``shared_buffers`` -- ``effective_cache_size`` -- ``work_mem`` -- ``maintenance_work_mem`` -- ``autovacuum_work_mem`` - -Note that the appropriate values for those fields depend on the amount of free -memory the database host has available. - -Synapse config -============== - -When you are ready to start using PostgreSQL, edit the ``database`` section in -your config file to match the following lines:: - - database: - name: psycopg2 - args: - user: - password: - database: - host: - cp_min: 5 - cp_max: 10 - -All key, values in ``args`` are passed to the ``psycopg2.connect(..)`` -function, except keys beginning with ``cp_``, which are consumed by the twisted -adbapi connection pool. - - -Porting from SQLite -=================== - -Overview -~~~~~~~~ - -The script ``synapse_port_db`` allows porting an existing synapse server -backed by SQLite to using PostgreSQL. This is done in as a two phase process: - -1. Copy the existing SQLite database to a separate location (while the server - is down) and running the port script against that offline database. -2. Shut down the server. Rerun the port script to port any data that has come - in since taking the first snapshot. Restart server against the PostgreSQL - database. - -The port script is designed to be run repeatedly against newer snapshots of the -SQLite database file. This makes it safe to repeat step 1 if there was a delay -between taking the previous snapshot and being ready to do step 2. - -It is safe to at any time kill the port script and restart it. - -Using the port script -~~~~~~~~~~~~~~~~~~~~~ - -Firstly, shut down the currently running synapse server and copy its database -file (typically ``homeserver.db``) to another location. Once the copy is -complete, restart synapse. For instance:: - - ./synctl stop - cp homeserver.db homeserver.db.snapshot - ./synctl start - -Copy the old config file into a new config file:: - - cp homeserver.yaml homeserver-postgres.yaml - -Edit the database section as described in the section *Synapse config* above -and with the SQLite snapshot located at ``homeserver.db.snapshot`` simply run:: - - synapse_port_db --sqlite-database homeserver.db.snapshot \ - --postgres-config homeserver-postgres.yaml - -The flag ``--curses`` displays a coloured curses progress UI. - -If the script took a long time to complete, or time has otherwise passed since -the original snapshot was taken, repeat the previous steps with a newer -snapshot. - -To complete the conversion shut down the synapse server and run the port -script one last time, e.g. if the SQLite database is at ``homeserver.db`` -run:: - - synapse_port_db --sqlite-database homeserver.db \ - --postgres-config homeserver-postgres.yaml - -Once that has completed, change the synapse config to point at the PostgreSQL -database configuration file ``homeserver-postgres.yaml``:: - - ./synctl stop - mv homeserver.yaml homeserver-old-sqlite.yaml - mv homeserver-postgres.yaml homeserver.yaml - ./synctl start - -Synapse should now be running against PostgreSQL. diff --git a/docs/replication.md b/docs/replication.md new file mode 100644 index 0000000000..ed88233157 --- /dev/null +++ b/docs/replication.md @@ -0,0 +1,37 @@ +# Replication Architecture + +## Motivation + +We'd like to be able to split some of the work that synapse does into +multiple python processes. In theory multiple synapse processes could +share a single postgresql database and we\'d scale up by running more +synapse processes. However much of synapse assumes that only one process +is interacting with the database, both for assigning unique identifiers +when inserting into tables, notifying components about new updates, and +for invalidating its caches. + +So running multiple copies of the current code isn't an option. One way +to run multiple processes would be to have a single writer process and +multiple reader processes connected to the same database. In order to do +this we'd need a way for the reader process to invalidate its in-memory +caches when an update happens on the writer. One way to do this is for +the writer to present an append-only log of updates which the readers +can consume to invalidate their caches and to push updates to listening +clients or pushers. + +Synapse already stores much of its data as an append-only log so that it +can correctly respond to `/sync` requests so the amount of code changes +needed to expose the append-only log to the readers should be fairly +minimal. + +## Architecture + +### The Replication Protocol + +See [tcp_replication.md](tcp_replication.md) + +### The Slaved DataStore + +There are read-only version of the synapse storage layer in +`synapse/replication/slave/storage` that use the response of the +replication API to invalidate their caches. diff --git a/docs/replication.rst b/docs/replication.rst deleted file mode 100644 index 310abb3488..0000000000 --- a/docs/replication.rst +++ /dev/null @@ -1,40 +0,0 @@ -Replication Architecture -======================== - -Motivation ----------- - -We'd like to be able to split some of the work that synapse does into multiple -python processes. In theory multiple synapse processes could share a single -postgresql database and we'd scale up by running more synapse processes. -However much of synapse assumes that only one process is interacting with the -database, both for assigning unique identifiers when inserting into tables, -notifying components about new updates, and for invalidating its caches. - -So running multiple copies of the current code isn't an option. One way to -run multiple processes would be to have a single writer process and multiple -reader processes connected to the same database. In order to do this we'd need -a way for the reader process to invalidate its in-memory caches when an update -happens on the writer. One way to do this is for the writer to present an -append-only log of updates which the readers can consume to invalidate their -caches and to push updates to listening clients or pushers. - -Synapse already stores much of its data as an append-only log so that it can -correctly respond to /sync requests so the amount of code changes needed to -expose the append-only log to the readers should be fairly minimal. - -Architecture ------------- - -The Replication Protocol -~~~~~~~~~~~~~~~~~~~~~~~~ - -See ``tcp_replication.rst`` - - -The Slaved DataStore -~~~~~~~~~~~~~~~~~~~~ - -There are read-only version of the synapse storage layer in -``synapse/replication/slave/storage`` that use the response of the replication -API to invalidate their caches. diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md new file mode 100644 index 0000000000..dcfc5c64aa --- /dev/null +++ b/docs/reverse_proxy.md @@ -0,0 +1,123 @@ +# Using a reverse proxy with Synapse + +It is recommended to put a reverse proxy such as +[nginx](https://nginx.org/en/docs/http/ngx_http_proxy_module.html), +[Apache](https://httpd.apache.org/docs/current/mod/mod_proxy_http.html), +[Caddy](https://caddyserver.com/docs/proxy) or +[HAProxy](https://www.haproxy.org/) in front of Synapse. One advantage +of doing so is that it means that you can expose the default https port +(443) to Matrix clients without needing to run Synapse with root +privileges. + +> **NOTE**: Your reverse proxy must not `canonicalise` or `normalise` +the requested URI in any way (for example, by decoding `%xx` escapes). +Beware that Apache *will* canonicalise URIs unless you specifify +`nocanon`. + +When setting up a reverse proxy, remember that Matrix clients and other +Matrix servers do not necessarily need to connect to your server via the +same server name or port. Indeed, clients will use port 443 by default, +whereas servers default to port 8448. Where these are different, we +refer to the 'client port' and the \'federation port\'. See [Setting +up federation](federate.md) for more details of the algorithm used for +federation connections. + +Let's assume that we expect clients to connect to our server at +`https://matrix.example.com`, and other servers to connect at +`https://example.com:8448`. The following sections detail the configuration of +the reverse proxy and the homeserver. + +## Webserver configuration examples + +> **NOTE**: You only need one of these. + +### nginx + + server { + listen 443 ssl; + listen [::]:443 ssl; + server_name matrix.example.com; + + location /_matrix { + proxy_pass http://localhost:8008; + proxy_set_header X-Forwarded-For $remote_addr; + } + } + + server { + listen 8448 ssl default_server; + listen [::]:8448 ssl default_server; + server_name example.com; + + location / { + proxy_pass http://localhost:8008; + proxy_set_header X-Forwarded-For $remote_addr; + } + } + +> **NOTE**: Do not add a `/` after the port in `proxy_pass`, otherwise nginx will +canonicalise/normalise the URI. + +### Caddy + + matrix.example.com { + proxy /_matrix http://localhost:8008 { + transparent + } + } + + example.com:8448 { + proxy / http://localhost:8008 { + transparent + } + } + +### Apache + + + SSLEngine on + ServerName matrix.example.com; + + AllowEncodedSlashes NoDecode + ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon + ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix + + + + SSLEngine on + ServerName example.com; + + AllowEncodedSlashes NoDecode + ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon + ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix + + +> **NOTE**: ensure the `nocanon` options are included. + +### HAProxy + + frontend https + bind :::443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1 + + # Matrix client traffic + acl matrix-host hdr(host) -i matrix.example.com + acl matrix-path path_beg /_matrix + + use_backend matrix if matrix-host matrix-path + + frontend matrix-federation + bind :::8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1 + default_backend matrix + + backend matrix + server matrix 127.0.0.1:8008 + +## Homeserver Configuration + +You will also want to set `bind_addresses: ['127.0.0.1']` and +`x_forwarded: true` for port 8008 in `homeserver.yaml` to ensure that +client IP addresses are recorded correctly. + +Having done so, you can then use `https://matrix.example.com` (instead +of `https://matrix.example.com:8448`) as the "Custom server" when +connecting to Synapse from a client. diff --git a/docs/reverse_proxy.rst b/docs/reverse_proxy.rst deleted file mode 100644 index 4b640ffc4f..0000000000 --- a/docs/reverse_proxy.rst +++ /dev/null @@ -1,112 +0,0 @@ -Using a reverse proxy with Synapse -================================== - -It is recommended to put a reverse proxy such as -`nginx `_, -`Apache `_, -`Caddy `_ or -`HAProxy `_ in front of Synapse. One advantage of -doing so is that it means that you can expose the default https port (443) to -Matrix clients without needing to run Synapse with root privileges. - -**NOTE**: Your reverse proxy must not 'canonicalise' or 'normalise' the -requested URI in any way (for example, by decoding ``%xx`` escapes). Beware -that Apache *will* canonicalise URIs unless you specifify ``nocanon``. - -When setting up a reverse proxy, remember that Matrix clients and other Matrix -servers do not necessarily need to connect to your server via the same server -name or port. Indeed, clients will use port 443 by default, whereas servers -default to port 8448. Where these are different, we refer to the 'client port' -and the 'federation port'. See `Setting up federation -`_ for more details of the algorithm used for -federation connections. - -Let's assume that we expect clients to connect to our server at -``https://matrix.example.com``, and other servers to connect at -``https://example.com:8448``. Here are some example configurations: - -* nginx:: - - server { - listen 443 ssl; - listen [::]:443 ssl; - server_name matrix.example.com; - - location /_matrix { - proxy_pass http://localhost:8008; - proxy_set_header X-Forwarded-For $remote_addr; - } - } - - server { - listen 8448 ssl default_server; - listen [::]:8448 ssl default_server; - server_name example.com; - - location / { - proxy_pass http://localhost:8008; - proxy_set_header X-Forwarded-For $remote_addr; - } - } - - Do not add a `/` after the port in `proxy_pass`, otherwise nginx will canonicalise/normalise the URI. - -* Caddy:: - - matrix.example.com { - proxy /_matrix http://localhost:8008 { - transparent - } - } - - example.com:8448 { - proxy / http://localhost:8008 { - transparent - } - } - -* Apache (note the ``nocanon`` options here!):: - - - SSLEngine on - ServerName matrix.example.com; - - AllowEncodedSlashes NoDecode - ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon - ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix - - - - SSLEngine on - ServerName example.com; - - AllowEncodedSlashes NoDecode - ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon - ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix - - -* HAProxy:: - - frontend https - bind :::443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1 - - # Matrix client traffic - acl matrix-host hdr(host) -i matrix.example.com - acl matrix-path path_beg /_matrix - - use_backend matrix if matrix-host matrix-path - - frontend matrix-federation - bind :::8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1 - default_backend matrix - - backend matrix - server matrix 127.0.0.1:8008 - -You will also want to set ``bind_addresses: ['127.0.0.1']`` and ``x_forwarded: true`` -for port 8008 in ``homeserver.yaml`` to ensure that client IP addresses are -recorded correctly. - -Having done so, you can then use ``https://matrix.example.com`` (instead of -``https://matrix.example.com:8448``) as the "Custom server" when connecting to -Synapse from a client. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index dd4e2d5ebd..d5a8d24c2b 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -136,8 +136,8 @@ federation_ip_range_blacklist: # # type: the type of listener. Normally 'http', but other valid options are: # 'manhole' (see docs/manhole.md), -# 'metrics' (see docs/metrics-howto.rst), -# 'replication' (see docs/workers.rst). +# 'metrics' (see docs/metrics-howto.md), +# 'replication' (see docs/workers.md). # # tls: set to true to enable TLS for this listener. Will use the TLS # key/cert specified in tls_private_key_path / tls_certificate_path. @@ -172,12 +172,12 @@ federation_ip_range_blacklist: # # media: the media API (/_matrix/media). # -# metrics: the metrics interface. See docs/metrics-howto.rst. +# metrics: the metrics interface. See docs/metrics-howto.md. # # openid: OpenID authentication. # # replication: the HTTP replication API (/_synapse/replication). See -# docs/workers.rst. +# docs/workers.md. # # static: static resources under synapse/static (/_matrix/static). (Mostly # useful for 'fallback authentication'.) @@ -201,7 +201,7 @@ listeners: # that unwraps TLS. # # If you plan to use a reverse proxy, please see - # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. + # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md. # - port: 8008 tls: false @@ -1520,7 +1520,7 @@ opentracing: #enabled: true # The list of homeservers we wish to send and receive span contexts and span baggage. - # See docs/opentracing.rst + # See docs/opentracing.md # This is a list of regexes which are matched against the server_name of the # homeserver. # diff --git a/docs/tcp_replication.md b/docs/tcp_replication.md new file mode 100644 index 0000000000..e099d8a87b --- /dev/null +++ b/docs/tcp_replication.md @@ -0,0 +1,249 @@ +# TCP Replication + +## Motivation + +Previously the workers used an HTTP long poll mechanism to get updates +from the master, which had the problem of causing a lot of duplicate +work on the server. This TCP protocol replaces those APIs with the aim +of increased efficiency. + +## Overview + +The protocol is based on fire and forget, line based commands. An +example flow would be (where '>' indicates master to worker and +'<' worker to master flows): + + > SERVER example.com + < REPLICATE events 53 + > RDATA events 54 ["$foo1:bar.com", ...] + > RDATA events 55 ["$foo4:bar.com", ...] + +The example shows the server accepting a new connection and sending its +identity with the `SERVER` command, followed by the client asking to +subscribe to the `events` stream from the token `53`. The server then +periodically sends `RDATA` commands which have the format +`RDATA `, where the format of `` is +defined by the individual streams. + +Error reporting happens by either the client or server sending an ERROR +command, and usually the connection will be closed. + +Since the protocol is a simple line based, its possible to manually +connect to the server using a tool like netcat. A few things should be +noted when manually using the protocol: + +- When subscribing to a stream using `REPLICATE`, the special token + `NOW` can be used to get all future updates. The special stream name + `ALL` can be used with `NOW` to subscribe to all available streams. +- The federation stream is only available if federation sending has + been disabled on the main process. +- The server will only time connections out that have sent a `PING` + command. If a ping is sent then the connection will be closed if no + further commands are receieved within 15s. Both the client and + server protocol implementations will send an initial PING on + connection and ensure at least one command every 5s is sent (not + necessarily `PING`). +- `RDATA` commands *usually* include a numeric token, however if the + stream has multiple rows to replicate per token the server will send + multiple `RDATA` commands, with all but the last having a token of + `batch`. See the documentation on `commands.RdataCommand` for + further details. + +## Architecture + +The basic structure of the protocol is line based, where the initial +word of each line specifies the command. The rest of the line is parsed +based on the command. For example, the RDATA command is defined as: + + RDATA + +(Note that may contains spaces, but cannot contain +newlines.) + +Blank lines are ignored. + +### Keep alives + +Both sides are expected to send at least one command every 5s or so, and +should send a `PING` command if necessary. If either side do not receive +a command within e.g. 15s then the connection should be closed. + +Because the server may be connected to manually using e.g. netcat, the +timeouts aren't enabled until an initial `PING` command is seen. Both +the client and server implementations below send a `PING` command +immediately on connection to ensure the timeouts are enabled. + +This ensures that both sides can quickly realize if the tcp connection +has gone and handle the situation appropriately. + +### Start up + +When a new connection is made, the server: + +- Sends a `SERVER` command, which includes the identity of the server, + allowing the client to detect if its connected to the expected + server +- Sends a `PING` command as above, to enable the client to time out + connections promptly. + +The client: + +- Sends a `NAME` command, allowing the server to associate a human + friendly name with the connection. This is optional. +- Sends a `PING` as above +- For each stream the client wishes to subscribe to it sends a + `REPLICATE` with the `stream_name` and token it wants to subscribe + from. +- On receipt of a `SERVER` command, checks that the server name + matches the expected server name. + +### Error handling + +If either side detects an error it can send an `ERROR` command and close +the connection. + +If the client side loses the connection to the server it should +reconnect, following the steps above. + +### Congestion + +If the server sends messages faster than the client can consume them the +server will first buffer a (fairly large) number of commands and then +disconnect the client. This ensures that we don't queue up an unbounded +number of commands in memory and gives us a potential oppurtunity to +squawk loudly. When/if the client recovers it can reconnect to the +server and ask for missed messages. + +### Reliability + +In general the replication stream should be considered an unreliable +transport since e.g. commands are not resent if the connection +disappears. + +The exception to that are the replication streams, i.e. RDATA commands, +since these include tokens which can be used to restart the stream on +connection errors. + +The client should keep track of the token in the last RDATA command +received for each stream so that on reconneciton it can start streaming +from the correct place. Note: not all RDATA have valid tokens due to +batching. See `RdataCommand` for more details. + +### Example + +An example iteraction is shown below. Each line is prefixed with '>' +or '<' to indicate which side is sending, these are *not* included on +the wire: + + * connection established * + > SERVER localhost:8823 + > PING 1490197665618 + < NAME synapse.app.appservice + < PING 1490197665618 + < REPLICATE events 1 + < REPLICATE backfill 1 + < REPLICATE caches 1 + > POSITION events 1 + > POSITION backfill 1 + > POSITION caches 1 + > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513] + > RDATA events 14 ["$149019767112vOHxz:localhost:8823", + "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null] + < PING 1490197675618 + > ERROR server stopping + * connection closed by server * + +The `POSITION` command sent by the server is used to set the clients +position without needing to send data with the `RDATA` command. + +An example of a batched set of `RDATA` is: + + > RDATA caches batch ["get_user_by_id",["@test:localhost:8823"],1490197670513] + > RDATA caches batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513] + > RDATA caches batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513] + > RDATA caches 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513] + +In this case the client shouldn't advance their caches token until it +sees the the last `RDATA`. + +### List of commands + +The list of valid commands, with which side can send it: server (S) or +client (C): + +#### SERVER (S) + + Sent at the start to identify which server the client is talking to + +#### RDATA (S) + + A single update in a stream + +#### POSITION (S) + + The position of the stream has been updated. Sent to the client + after all missing updates for a stream have been sent to the client + and they're now up to date. + +#### ERROR (S, C) + + There was an error + +#### PING (S, C) + + Sent periodically to ensure the connection is still alive + +#### NAME (C) + + Sent at the start by client to inform the server who they are + +#### REPLICATE (C) + + Asks the server to replicate a given stream + +#### USER_SYNC (C) + + A user has started or stopped syncing + +#### FEDERATION_ACK (C) + + Acknowledge receipt of some federation data + +#### REMOVE_PUSHER (C) + + Inform the server a pusher should be removed + +#### INVALIDATE_CACHE (C) + + Inform the server a cache should be invalidated + +#### SYNC (S, C) + + Used exclusively in tests + +See `synapse/replication/tcp/commands.py` for a detailed description and +the format of each command. + +### Cache Invalidation Stream + +The cache invalidation stream is used to inform workers when they need +to invalidate any of their caches in the data store. This is done by +streaming all cache invalidations done on master down to the workers, +assuming that any caches on the workers also exist on the master. + +Each individual cache invalidation results in a row being sent down +replication, which includes the cache name (the name of the function) +and they key to invalidate. For example: + + > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251] + +However, there are times when a number of caches need to be invalidated +at the same time with the same key. To reduce traffic we batch those +invalidations into a single poke by defining a special cache name that +workers understand to mean to expand to invalidate the correct caches. + +Currently the special cache names are declared in +`synapse/storage/_base.py` and are: + +1. `cs_cache_fake` ─ invalidates caches that depend on the current + state diff --git a/docs/tcp_replication.rst b/docs/tcp_replication.rst deleted file mode 100644 index 75e723484c..0000000000 --- a/docs/tcp_replication.rst +++ /dev/null @@ -1,249 +0,0 @@ -TCP Replication -=============== - -Motivation ----------- - -Previously the workers used an HTTP long poll mechanism to get updates from the -master, which had the problem of causing a lot of duplicate work on the server. -This TCP protocol replaces those APIs with the aim of increased efficiency. - - - -Overview --------- - -The protocol is based on fire and forget, line based commands. An example flow -would be (where '>' indicates master to worker and '<' worker to master flows):: - - > SERVER example.com - < REPLICATE events 53 - > RDATA events 54 ["$foo1:bar.com", ...] - > RDATA events 55 ["$foo4:bar.com", ...] - -The example shows the server accepting a new connection and sending its identity -with the ``SERVER`` command, followed by the client asking to subscribe to the -``events`` stream from the token ``53``. The server then periodically sends ``RDATA`` -commands which have the format ``RDATA ``, where the -format of ```` is defined by the individual streams. - -Error reporting happens by either the client or server sending an `ERROR` -command, and usually the connection will be closed. - - -Since the protocol is a simple line based, its possible to manually connect to -the server using a tool like netcat. A few things should be noted when manually -using the protocol: - -* When subscribing to a stream using ``REPLICATE``, the special token ``NOW`` can - be used to get all future updates. The special stream name ``ALL`` can be used - with ``NOW`` to subscribe to all available streams. -* The federation stream is only available if federation sending has been - disabled on the main process. -* The server will only time connections out that have sent a ``PING`` command. - If a ping is sent then the connection will be closed if no further commands - are receieved within 15s. Both the client and server protocol implementations - will send an initial PING on connection and ensure at least one command every - 5s is sent (not necessarily ``PING``). -* ``RDATA`` commands *usually* include a numeric token, however if the stream - has multiple rows to replicate per token the server will send multiple - ``RDATA`` commands, with all but the last having a token of ``batch``. See - the documentation on ``commands.RdataCommand`` for further details. - - -Architecture ------------- - -The basic structure of the protocol is line based, where the initial word of -each line specifies the command. The rest of the line is parsed based on the -command. For example, the `RDATA` command is defined as:: - - RDATA - -(Note that `` may contains spaces, but cannot contain newlines.) - -Blank lines are ignored. - - -Keep alives -~~~~~~~~~~~ - -Both sides are expected to send at least one command every 5s or so, and -should send a ``PING`` command if necessary. If either side do not receive a -command within e.g. 15s then the connection should be closed. - -Because the server may be connected to manually using e.g. netcat, the timeouts -aren't enabled until an initial ``PING`` command is seen. Both the client and -server implementations below send a ``PING`` command immediately on connection to -ensure the timeouts are enabled. - -This ensures that both sides can quickly realize if the tcp connection has gone -and handle the situation appropriately. - - -Start up -~~~~~~~~ - -When a new connection is made, the server: - -* Sends a ``SERVER`` command, which includes the identity of the server, allowing - the client to detect if its connected to the expected server -* Sends a ``PING`` command as above, to enable the client to time out connections - promptly. - -The client: - -* Sends a ``NAME`` command, allowing the server to associate a human friendly - name with the connection. This is optional. -* Sends a ``PING`` as above -* For each stream the client wishes to subscribe to it sends a ``REPLICATE`` - with the stream_name and token it wants to subscribe from. -* On receipt of a ``SERVER`` command, checks that the server name matches the - expected server name. - - -Error handling -~~~~~~~~~~~~~~ - -If either side detects an error it can send an ``ERROR`` command and close the -connection. - -If the client side loses the connection to the server it should reconnect, -following the steps above. - - -Congestion -~~~~~~~~~~ - -If the server sends messages faster than the client can consume them the server -will first buffer a (fairly large) number of commands and then disconnect the -client. This ensures that we don't queue up an unbounded number of commands in -memory and gives us a potential oppurtunity to squawk loudly. When/if the client -recovers it can reconnect to the server and ask for missed messages. - - -Reliability -~~~~~~~~~~~ - -In general the replication stream should be considered an unreliable transport -since e.g. commands are not resent if the connection disappears. - -The exception to that are the replication streams, i.e. RDATA commands, since -these include tokens which can be used to restart the stream on connection -errors. - -The client should keep track of the token in the last RDATA command received -for each stream so that on reconneciton it can start streaming from the correct -place. Note: not all RDATA have valid tokens due to batching. See -``RdataCommand`` for more details. - -Example -~~~~~~~ - -An example iteraction is shown below. Each line is prefixed with '>' or '<' to -indicate which side is sending, these are *not* included on the wire:: - - * connection established * - > SERVER localhost:8823 - > PING 1490197665618 - < NAME synapse.app.appservice - < PING 1490197665618 - < REPLICATE events 1 - < REPLICATE backfill 1 - < REPLICATE caches 1 - > POSITION events 1 - > POSITION backfill 1 - > POSITION caches 1 - > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513] - > RDATA events 14 ["$149019767112vOHxz:localhost:8823", - "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null] - < PING 1490197675618 - > ERROR server stopping - * connection closed by server * - -The ``POSITION`` command sent by the server is used to set the clients position -without needing to send data with the ``RDATA`` command. - - -An example of a batched set of ``RDATA`` is:: - - > RDATA caches batch ["get_user_by_id",["@test:localhost:8823"],1490197670513] - > RDATA caches batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513] - > RDATA caches batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513] - > RDATA caches 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513] - -In this case the client shouldn't advance their caches token until it sees the -the last ``RDATA``. - - -List of commands -~~~~~~~~~~~~~~~~ - -The list of valid commands, with which side can send it: server (S) or client (C): - -SERVER (S) - Sent at the start to identify which server the client is talking to - -RDATA (S) - A single update in a stream - -POSITION (S) - The position of the stream has been updated. Sent to the client after all - missing updates for a stream have been sent to the client and they're now - up to date. - -ERROR (S, C) - There was an error - -PING (S, C) - Sent periodically to ensure the connection is still alive - -NAME (C) - Sent at the start by client to inform the server who they are - -REPLICATE (C) - Asks the server to replicate a given stream - -USER_SYNC (C) - A user has started or stopped syncing - -FEDERATION_ACK (C) - Acknowledge receipt of some federation data - -REMOVE_PUSHER (C) - Inform the server a pusher should be removed - -INVALIDATE_CACHE (C) - Inform the server a cache should be invalidated - -SYNC (S, C) - Used exclusively in tests - - -See ``synapse/replication/tcp/commands.py`` for a detailed description and the -format of each command. - - -Cache Invalidation Stream -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The cache invalidation stream is used to inform workers when they need to -invalidate any of their caches in the data store. This is done by streaming all -cache invalidations done on master down to the workers, assuming that any caches -on the workers also exist on the master. - -Each individual cache invalidation results in a row being sent down replication, -which includes the cache name (the name of the function) and they key to -invalidate. For example:: - - > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251] - -However, there are times when a number of caches need to be invalidated at the -same time with the same key. To reduce traffic we batch those invalidations into -a single poke by defining a special cache name that workers understand to mean -to expand to invalidate the correct caches. - -Currently the special cache names are declared in ``synapse/storage/_base.py`` -and are: - -1. ``cs_cache_fake`` ─ invalidates caches that depend on the current state diff --git a/docs/turn-howto.md b/docs/turn-howto.md new file mode 100644 index 0000000000..4a983621e5 --- /dev/null +++ b/docs/turn-howto.md @@ -0,0 +1,123 @@ +# Overview + +This document explains how to enable VoIP relaying on your Home Server with +TURN. + +The synapse Matrix Home Server supports integration with TURN server via the +[TURN server REST API](). This +allows the Home Server to generate credentials that are valid for use on the +TURN server through the use of a secret shared between the Home Server and the +TURN server. + +The following sections describe how to install [coturn]() (which implements the TURN REST API) and integrate it with synapse. + +## `coturn` Setup + +### Initial installation + +The TURN daemon `coturn` is available from a variety of sources such as native package managers, or installation from source. + +#### Debian installation + + # apt install coturn + +#### Source installation + +1. Download the [latest release](https://github.com/coturn/coturn/releases/latest) from github. Unpack it and `cd` into the directory. + +1. Configure it: + + ./configure + + > You may need to install `libevent2`: if so, you should do so in + > the way recommended by your operating system. You can ignore + > warnings about lack of database support: a database is unnecessary + > for this purpose. + +1. Build and install it: + + make + make install + +1. Create or edit the config file in `/etc/turnserver.conf`. The relevant + lines, with example values, are: + + use-auth-secret + static-auth-secret=[your secret key here] + realm=turn.myserver.org + + See `turnserver.conf` for explanations of the options. One way to generate + the `static-auth-secret` is with `pwgen`: + + pwgen -s 64 1 + +1. Consider your security settings. TURN lets users request a relay which will + connect to arbitrary IP addresses and ports. The following configuration is + suggested as a minimum starting point: + + # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay. + no-tcp-relay + + # don't let the relay ever try to connect to private IP address ranges within your network (if any) + # given the turn server is likely behind your firewall, remember to include any privileged public IPs too. + denied-peer-ip=10.0.0.0-10.255.255.255 + denied-peer-ip=192.168.0.0-192.168.255.255 + denied-peer-ip=172.16.0.0-172.31.255.255 + + # special case the turn server itself so that client->TURN->TURN->client flows work + allowed-peer-ip=10.0.0.1 + + # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS. + user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user. + total-quota=1200 + + Ideally coturn should refuse to relay traffic which isn't SRTP; see + + +1. Ensure your firewall allows traffic into the TURN server on the ports + you've configured it to listen on (remember to allow both TCP and UDP TURN + traffic) + +1. If you've configured coturn to support TLS/DTLS, generate or import your + private key and certificate. + +1. Start the turn server: + + bin/turnserver -o + +## synapse Setup + +Your home server configuration file needs the following extra keys: + +1. "`turn_uris`": This needs to be a yaml list of public-facing URIs + for your TURN server to be given out to your clients. Add separate + entries for each transport your TURN server supports. +2. "`turn_shared_secret`": This is the secret shared between your + Home server and your TURN server, so you should set it to the same + string you used in turnserver.conf. +3. "`turn_user_lifetime`": This is the amount of time credentials + generated by your Home Server are valid for (in milliseconds). + Shorter times offer less potential for abuse at the expense of + increased traffic between web clients and your home server to + refresh credentials. The TURN REST API specification recommends + one day (86400000). +4. "`turn_allow_guests`": Whether to allow guest users to use the + TURN server. This is enabled by default, as otherwise VoIP will + not work reliably for guests. However, it does introduce a + security risk as it lets guests connect to arbitrary endpoints + without having gone through a CAPTCHA or similar to register a + real account. + +As an example, here is the relevant section of the config file for matrix.org: + + turn_uris: [ "turn:turn.matrix.org:3478?transport=udp", "turn:turn.matrix.org:3478?transport=tcp" ] + turn_shared_secret: n0t4ctuAllymatr1Xd0TorgSshar3d5ecret4obvIousreAsons + turn_user_lifetime: 86400000 + turn_allow_guests: True + +After updating the homeserver configuration, you must restart synapse: + + cd /where/you/run/synapse + ./synctl restart + +..and your Home Server now supports VoIP relaying! diff --git a/docs/turn-howto.rst b/docs/turn-howto.rst deleted file mode 100644 index a2fc5c8820..0000000000 --- a/docs/turn-howto.rst +++ /dev/null @@ -1,127 +0,0 @@ -How to enable VoIP relaying on your Home Server with TURN - -Overview --------- -The synapse Matrix Home Server supports integration with TURN server via the -TURN server REST API -(http://tools.ietf.org/html/draft-uberti-behave-turn-rest-00). This allows -the Home Server to generate credentials that are valid for use on the TURN -server through the use of a secret shared between the Home Server and the -TURN server. - -This document describes how to install coturn -(https://github.com/coturn/coturn) which also supports the TURN REST API, -and integrate it with synapse. - -coturn Setup -============ - -You may be able to setup coturn via your package manager, or set it up manually using the usual ``configure, make, make install`` process. - - 1. Check out coturn:: - - git clone https://github.com/coturn/coturn.git coturn - cd coturn - - 2. Configure it:: - - ./configure - - You may need to install ``libevent2``: if so, you should do so - in the way recommended by your operating system. - You can ignore warnings about lack of database support: a - database is unnecessary for this purpose. - - 3. Build and install it:: - - make - make install - - 4. Create or edit the config file in ``/etc/turnserver.conf``. The relevant - lines, with example values, are:: - - use-auth-secret - static-auth-secret=[your secret key here] - realm=turn.myserver.org - - See turnserver.conf for explanations of the options. - One way to generate the static-auth-secret is with pwgen:: - - pwgen -s 64 1 - - 5. Consider your security settings. TURN lets users request a relay - which will connect to arbitrary IP addresses and ports. At the least - we recommend:: - - # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay. - no-tcp-relay - - # don't let the relay ever try to connect to private IP address ranges within your network (if any) - # given the turn server is likely behind your firewall, remember to include any privileged public IPs too. - denied-peer-ip=10.0.0.0-10.255.255.255 - denied-peer-ip=192.168.0.0-192.168.255.255 - denied-peer-ip=172.16.0.0-172.31.255.255 - - # special case the turn server itself so that client->TURN->TURN->client flows work - allowed-peer-ip=10.0.0.1 - - # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS. - user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user. - total-quota=1200 - - Ideally coturn should refuse to relay traffic which isn't SRTP; - see https://github.com/matrix-org/synapse/issues/2009 - - 6. Ensure your firewall allows traffic into the TURN server on - the ports you've configured it to listen on (remember to allow - both TCP and UDP TURN traffic) - - 7. If you've configured coturn to support TLS/DTLS, generate or - import your private key and certificate. - - 8. Start the turn server:: - - bin/turnserver -o - - -synapse Setup -============= - -Your home server configuration file needs the following extra keys: - - 1. "turn_uris": This needs to be a yaml list - of public-facing URIs for your TURN server to be given out - to your clients. Add separate entries for each transport your - TURN server supports. - - 2. "turn_shared_secret": This is the secret shared between your Home - server and your TURN server, so you should set it to the same - string you used in turnserver.conf. - - 3. "turn_user_lifetime": This is the amount of time credentials - generated by your Home Server are valid for (in milliseconds). - Shorter times offer less potential for abuse at the expense - of increased traffic between web clients and your home server - to refresh credentials. The TURN REST API specification recommends - one day (86400000). - - 4. "turn_allow_guests": Whether to allow guest users to use the TURN - server. This is enabled by default, as otherwise VoIP will not - work reliably for guests. However, it does introduce a security risk - as it lets guests connect to arbitrary endpoints without having gone - through a CAPTCHA or similar to register a real account. - -As an example, here is the relevant section of the config file for -matrix.org:: - - turn_uris: [ "turn:turn.matrix.org:3478?transport=udp", "turn:turn.matrix.org:3478?transport=tcp" ] - turn_shared_secret: n0t4ctuAllymatr1Xd0TorgSshar3d5ecret4obvIousreAsons - turn_user_lifetime: 86400000 - turn_allow_guests: True - -Now, restart synapse:: - - cd /where/you/run/synapse - ./synctl restart - -...and your Home Server now supports VoIP relaying! diff --git a/docs/workers.md b/docs/workers.md new file mode 100644 index 0000000000..4bd60ba0a0 --- /dev/null +++ b/docs/workers.md @@ -0,0 +1,284 @@ +# Scaling synapse via workers + +Synapse has experimental support for splitting out functionality into +multiple separate python processes, helping greatly with scalability. These +processes are called 'workers', and are (eventually) intended to scale +horizontally independently. + +All of the below is highly experimental and subject to change as Synapse evolves, +but documenting it here to help folks needing highly scalable Synapses similar +to the one running matrix.org! + +All processes continue to share the same database instance, and as such, workers +only work with postgres based synapse deployments (sharing a single sqlite +across multiple processes is a recipe for disaster, plus you should be using +postgres anyway if you care about scalability). + +The workers communicate with the master synapse process via a synapse-specific +TCP protocol called 'replication' - analogous to MySQL or Postgres style +database replication; feeding a stream of relevant data to the workers so they +can be kept in sync with the main synapse process and database state. + +## Configuration + +To make effective use of the workers, you will need to configure an HTTP +reverse-proxy such as nginx or haproxy, which will direct incoming requests to +the correct worker, or to the main synapse instance. Note that this includes +requests made to the federation port. See [reverse_proxy.md](reverse_proxy.md) +for information on setting up a reverse proxy. + +To enable workers, you need to add two replication listeners to the master +synapse, e.g.: + + listeners: + # The TCP replication port + - port: 9092 + bind_address: '127.0.0.1' + type: replication + # The HTTP replication port + - port: 9093 + bind_address: '127.0.0.1' + type: http + resources: + - names: [replication] + +Under **no circumstances** should these replication API listeners be exposed to +the public internet; it currently implements no authentication whatsoever and is +unencrypted. + +(Roughly, the TCP port is used for streaming data from the master to the +workers, and the HTTP port for the workers to send data to the main +synapse process.) + +You then create a set of configs for the various worker processes. These +should be worker configuration files, and should be stored in a dedicated +subdirectory, to allow synctl to manipulate them. An additional configuration +for the master synapse process will need to be created because the process will +not be started automatically. That configuration should look like this: + + worker_app: synapse.app.homeserver + daemonize: true + +Each worker configuration file inherits the configuration of the main homeserver +configuration file. You can then override configuration specific to that worker, +e.g. the HTTP listener that it provides (if any); logging configuration; etc. +You should minimise the number of overrides though to maintain a usable config. + +You must specify the type of worker application (`worker_app`). The currently +available worker applications are listed below. You must also specify the +replication endpoints that it's talking to on the main synapse process. +`worker_replication_host` should specify the host of the main synapse, +`worker_replication_port` should point to the TCP replication listener port and +`worker_replication_http_port` should point to the HTTP replication port. + +Currently, the `event_creator` and `federation_reader` workers require specifying +`worker_replication_http_port`. + +For instance: + + worker_app: synapse.app.synchrotron + + # The replication listener on the synapse to talk to. + worker_replication_host: 127.0.0.1 + worker_replication_port: 9092 + worker_replication_http_port: 9093 + + worker_listeners: + - type: http + port: 8083 + resources: + - names: + - client + + worker_daemonize: True + worker_pid_file: /home/matrix/synapse/synchrotron.pid + worker_log_config: /home/matrix/synapse/config/synchrotron_log_config.yaml + +...is a full configuration for a synchrotron worker instance, which will expose a +plain HTTP `/sync` endpoint on port 8083 separately from the `/sync` endpoint provided +by the main synapse. + +Obviously you should configure your reverse-proxy to route the relevant +endpoints to the worker (`localhost:8083` in the above example). + +Finally, to actually run your worker-based synapse, you must pass synctl the -a +commandline option to tell it to operate on all the worker configurations found +in the given directory, e.g.: + + synctl -a $CONFIG/workers start + +Currently one should always restart all workers when restarting or upgrading +synapse, unless you explicitly know it's safe not to. For instance, restarting +synapse without restarting all the synchrotrons may result in broken typing +notifications. + +To manipulate a specific worker, you pass the -w option to synctl: + + synctl -w $CONFIG/workers/synchrotron.yaml restart + +## Available worker applications + +### `synapse.app.pusher` + +Handles sending push notifications to sygnal and email. Doesn't handle any +REST endpoints itself, but you should set `start_pushers: False` in the +shared configuration file to stop the main synapse sending these notifications. + +Note this worker cannot be load-balanced: only one instance should be active. + +### `synapse.app.synchrotron` + +The synchrotron handles `sync` requests from clients. In particular, it can +handle REST endpoints matching the following regular expressions: + + ^/_matrix/client/(v2_alpha|r0)/sync$ + ^/_matrix/client/(api/v1|v2_alpha|r0)/events$ + ^/_matrix/client/(api/v1|r0)/initialSync$ + ^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$ + +The above endpoints should all be routed to the synchrotron worker by the +reverse-proxy configuration. + +It is possible to run multiple instances of the synchrotron to scale +horizontally. In this case the reverse-proxy should be configured to +load-balance across the instances, though it will be more efficient if all +requests from a particular user are routed to a single instance. Extracting +a userid from the access token is currently left as an exercise for the reader. + +### `synapse.app.appservice` + +Handles sending output traffic to Application Services. Doesn't handle any +REST endpoints itself, but you should set `notify_appservices: False` in the +shared configuration file to stop the main synapse sending these notifications. + +Note this worker cannot be load-balanced: only one instance should be active. + +### `synapse.app.federation_reader` + +Handles a subset of federation endpoints. In particular, it can handle REST +endpoints matching the following regular expressions: + + ^/_matrix/federation/v1/event/ + ^/_matrix/federation/v1/state/ + ^/_matrix/federation/v1/state_ids/ + ^/_matrix/federation/v1/backfill/ + ^/_matrix/federation/v1/get_missing_events/ + ^/_matrix/federation/v1/publicRooms + ^/_matrix/federation/v1/query/ + ^/_matrix/federation/v1/make_join/ + ^/_matrix/federation/v1/make_leave/ + ^/_matrix/federation/v1/send_join/ + ^/_matrix/federation/v1/send_leave/ + ^/_matrix/federation/v1/invite/ + ^/_matrix/federation/v1/query_auth/ + ^/_matrix/federation/v1/event_auth/ + ^/_matrix/federation/v1/exchange_third_party_invite/ + ^/_matrix/federation/v1/send/ + ^/_matrix/key/v2/query + +The above endpoints should all be routed to the federation_reader worker by the +reverse-proxy configuration. + +The `^/_matrix/federation/v1/send/` endpoint must only be handled by a single +instance. + +### `synapse.app.federation_sender` + +Handles sending federation traffic to other servers. Doesn't handle any +REST endpoints itself, but you should set `send_federation: False` in the +shared configuration file to stop the main synapse sending this traffic. + +Note this worker cannot be load-balanced: only one instance should be active. + +### `synapse.app.media_repository` + +Handles the media repository. It can handle all endpoints starting with: + + /_matrix/media/ + +And the following regular expressions matching media-specific administration APIs: + + ^/_synapse/admin/v1/purge_media_cache$ + ^/_synapse/admin/v1/room/.*/media$ + ^/_synapse/admin/v1/quarantine_media/.*$ + +You should also set `enable_media_repo: False` in the shared configuration +file to stop the main synapse running background jobs related to managing the +media repository. + +Note this worker cannot be load-balanced: only one instance should be active. + +### `synapse.app.client_reader` + +Handles client API endpoints. It can handle REST endpoints matching the +following regular expressions: + + ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ + ^/_matrix/client/(api/v1|r0|unstable)/login$ + ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ + ^/_matrix/client/versions$ + ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$ + +Additionally, the following REST endpoints can be handled for GET requests: + + ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$ + +Additionally, the following REST endpoints can be handled, but all requests must +be routed to the same instance: + + ^/_matrix/client/(r0|unstable)/register$ + +Pagination requests can also be handled, but all requests with the same path +room must be routed to the same instance. Additionally, care must be taken to +ensure that the purge history admin API is not used while pagination requests +for the room are in flight: + + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$ + +### `synapse.app.user_dir` + +Handles searches in the user directory. It can handle REST endpoints matching +the following regular expressions: + + ^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$ + +### `synapse.app.frontend_proxy` + +Proxies some frequently-requested client endpoints to add caching and remove +load from the main synapse. It can handle REST endpoints matching the following +regular expressions: + + ^/_matrix/client/(api/v1|r0|unstable)/keys/upload + +If `use_presence` is False in the homeserver config, it can also handle REST +endpoints matching the following regular expressions: + + ^/_matrix/client/(api/v1|r0|unstable)/presence/[^/]+/status + +This "stub" presence handler will pass through `GET` request but make the +`PUT` effectively a no-op. + +It will proxy any requests it cannot handle to the main synapse instance. It +must therefore be configured with the location of the main instance, via +the `worker_main_http_uri` setting in the `frontend_proxy` worker configuration +file. For example: + + worker_main_http_uri: http://127.0.0.1:8008 + +### `synapse.app.event_creator` + +Handles some event creation. It can handle REST endpoints matching: + + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ + ^/_matrix/client/(api/v1|r0|unstable)/join/ + ^/_matrix/client/(api/v1|r0|unstable)/profile/ + +It will create events locally and then send them on to the main synapse +instance to be persisted and handled. diff --git a/docs/workers.rst b/docs/workers.rst deleted file mode 100644 index e11e117418..0000000000 --- a/docs/workers.rst +++ /dev/null @@ -1,301 +0,0 @@ -Scaling synapse via workers -=========================== - -Synapse has experimental support for splitting out functionality into -multiple separate python processes, helping greatly with scalability. These -processes are called 'workers', and are (eventually) intended to scale -horizontally independently. - -All of the below is highly experimental and subject to change as Synapse evolves, -but documenting it here to help folks needing highly scalable Synapses similar -to the one running matrix.org! - -All processes continue to share the same database instance, and as such, workers -only work with postgres based synapse deployments (sharing a single sqlite -across multiple processes is a recipe for disaster, plus you should be using -postgres anyway if you care about scalability). - -The workers communicate with the master synapse process via a synapse-specific -TCP protocol called 'replication' - analogous to MySQL or Postgres style -database replication; feeding a stream of relevant data to the workers so they -can be kept in sync with the main synapse process and database state. - -Configuration -------------- - -To make effective use of the workers, you will need to configure an HTTP -reverse-proxy such as nginx or haproxy, which will direct incoming requests to -the correct worker, or to the main synapse instance. Note that this includes -requests made to the federation port. See ``_ for -information on setting up a reverse proxy. - -To enable workers, you need to add two replication listeners to the master -synapse, e.g.:: - - listeners: - # The TCP replication port - - port: 9092 - bind_address: '127.0.0.1' - type: replication - # The HTTP replication port - - port: 9093 - bind_address: '127.0.0.1' - type: http - resources: - - names: [replication] - -Under **no circumstances** should these replication API listeners be exposed to -the public internet; it currently implements no authentication whatsoever and is -unencrypted. - -(Roughly, the TCP port is used for streaming data from the master to the -workers, and the HTTP port for the workers to send data to the main -synapse process.) - -You then create a set of configs for the various worker processes. These -should be worker configuration files, and should be stored in a dedicated -subdirectory, to allow synctl to manipulate them. An additional configuration -for the master synapse process will need to be created because the process will -not be started automatically. That configuration should look like this:: - - worker_app: synapse.app.homeserver - daemonize: true - -Each worker configuration file inherits the configuration of the main homeserver -configuration file. You can then override configuration specific to that worker, -e.g. the HTTP listener that it provides (if any); logging configuration; etc. -You should minimise the number of overrides though to maintain a usable config. - -You must specify the type of worker application (``worker_app``). The currently -available worker applications are listed below. You must also specify the -replication endpoints that it's talking to on the main synapse process. -``worker_replication_host`` should specify the host of the main synapse, -``worker_replication_port`` should point to the TCP replication listener port and -``worker_replication_http_port`` should point to the HTTP replication port. - -Currently, the ``event_creator`` and ``federation_reader`` workers require specifying -``worker_replication_http_port``. - -For instance:: - - worker_app: synapse.app.synchrotron - - # The replication listener on the synapse to talk to. - worker_replication_host: 127.0.0.1 - worker_replication_port: 9092 - worker_replication_http_port: 9093 - - worker_listeners: - - type: http - port: 8083 - resources: - - names: - - client - - worker_daemonize: True - worker_pid_file: /home/matrix/synapse/synchrotron.pid - worker_log_config: /home/matrix/synapse/config/synchrotron_log_config.yaml - -...is a full configuration for a synchrotron worker instance, which will expose a -plain HTTP ``/sync`` endpoint on port 8083 separately from the ``/sync`` endpoint provided -by the main synapse. - -Obviously you should configure your reverse-proxy to route the relevant -endpoints to the worker (``localhost:8083`` in the above example). - -Finally, to actually run your worker-based synapse, you must pass synctl the -a -commandline option to tell it to operate on all the worker configurations found -in the given directory, e.g.:: - - synctl -a $CONFIG/workers start - -Currently one should always restart all workers when restarting or upgrading -synapse, unless you explicitly know it's safe not to. For instance, restarting -synapse without restarting all the synchrotrons may result in broken typing -notifications. - -To manipulate a specific worker, you pass the -w option to synctl:: - - synctl -w $CONFIG/workers/synchrotron.yaml restart - - -Available worker applications ------------------------------ - -``synapse.app.pusher`` -~~~~~~~~~~~~~~~~~~~~~~ - -Handles sending push notifications to sygnal and email. Doesn't handle any -REST endpoints itself, but you should set ``start_pushers: False`` in the -shared configuration file to stop the main synapse sending these notifications. - -Note this worker cannot be load-balanced: only one instance should be active. - -``synapse.app.synchrotron`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The synchrotron handles ``sync`` requests from clients. In particular, it can -handle REST endpoints matching the following regular expressions:: - - ^/_matrix/client/(v2_alpha|r0)/sync$ - ^/_matrix/client/(api/v1|v2_alpha|r0)/events$ - ^/_matrix/client/(api/v1|r0)/initialSync$ - ^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$ - -The above endpoints should all be routed to the synchrotron worker by the -reverse-proxy configuration. - -It is possible to run multiple instances of the synchrotron to scale -horizontally. In this case the reverse-proxy should be configured to -load-balance across the instances, though it will be more efficient if all -requests from a particular user are routed to a single instance. Extracting -a userid from the access token is currently left as an exercise for the reader. - -``synapse.app.appservice`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles sending output traffic to Application Services. Doesn't handle any -REST endpoints itself, but you should set ``notify_appservices: False`` in the -shared configuration file to stop the main synapse sending these notifications. - -Note this worker cannot be load-balanced: only one instance should be active. - -``synapse.app.federation_reader`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles a subset of federation endpoints. In particular, it can handle REST -endpoints matching the following regular expressions:: - - ^/_matrix/federation/v1/event/ - ^/_matrix/federation/v1/state/ - ^/_matrix/federation/v1/state_ids/ - ^/_matrix/federation/v1/backfill/ - ^/_matrix/federation/v1/get_missing_events/ - ^/_matrix/federation/v1/publicRooms - ^/_matrix/federation/v1/query/ - ^/_matrix/federation/v1/make_join/ - ^/_matrix/federation/v1/make_leave/ - ^/_matrix/federation/v1/send_join/ - ^/_matrix/federation/v1/send_leave/ - ^/_matrix/federation/v1/invite/ - ^/_matrix/federation/v1/query_auth/ - ^/_matrix/federation/v1/event_auth/ - ^/_matrix/federation/v1/exchange_third_party_invite/ - ^/_matrix/federation/v1/send/ - ^/_matrix/key/v2/query - -The above endpoints should all be routed to the federation_reader worker by the -reverse-proxy configuration. - -The `^/_matrix/federation/v1/send/` endpoint must only be handled by a single -instance. - -``synapse.app.federation_sender`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles sending federation traffic to other servers. Doesn't handle any -REST endpoints itself, but you should set ``send_federation: False`` in the -shared configuration file to stop the main synapse sending this traffic. - -Note this worker cannot be load-balanced: only one instance should be active. - -``synapse.app.media_repository`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles the media repository. It can handle all endpoints starting with:: - - /_matrix/media/ - -And the following regular expressions matching media-specific administration -APIs:: - - ^/_synapse/admin/v1/purge_media_cache$ - ^/_synapse/admin/v1/room/.*/media$ - ^/_synapse/admin/v1/quarantine_media/.*$ - -You should also set ``enable_media_repo: False`` in the shared configuration -file to stop the main synapse running background jobs related to managing the -media repository. - -Note this worker cannot be load-balanced: only one instance should be active. - -``synapse.app.client_reader`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles client API endpoints. It can handle REST endpoints matching the -following regular expressions:: - - ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ - ^/_matrix/client/(api/v1|r0|unstable)/login$ - ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ - ^/_matrix/client/versions$ - ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$ - -Additionally, the following REST endpoints can be handled for GET requests:: - - ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$ - -Additionally, the following REST endpoints can be handled, but all requests must -be routed to the same instance:: - - ^/_matrix/client/(r0|unstable)/register$ - -Pagination requests can also be handled, but all requests with the same path -room must be routed to the same instance. Additionally, care must be taken to -ensure that the purge history admin API is not used while pagination requests -for the room are in flight:: - - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$ - - -``synapse.app.user_dir`` -~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles searches in the user directory. It can handle REST endpoints matching -the following regular expressions:: - - ^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$ - -``synapse.app.frontend_proxy`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Proxies some frequently-requested client endpoints to add caching and remove -load from the main synapse. It can handle REST endpoints matching the following -regular expressions:: - - ^/_matrix/client/(api/v1|r0|unstable)/keys/upload - -If ``use_presence`` is False in the homeserver config, it can also handle REST -endpoints matching the following regular expressions:: - - ^/_matrix/client/(api/v1|r0|unstable)/presence/[^/]+/status - -This "stub" presence handler will pass through ``GET`` request but make the -``PUT`` effectively a no-op. - -It will proxy any requests it cannot handle to the main synapse instance. It -must therefore be configured with the location of the main instance, via -the ``worker_main_http_uri`` setting in the frontend_proxy worker configuration -file. For example:: - - worker_main_http_uri: http://127.0.0.1:8008 - - -``synapse.app.event_creator`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles some event creation. It can handle REST endpoints matching:: - - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ - ^/_matrix/client/(api/v1|r0|unstable)/join/ - ^/_matrix/client/(api/v1|r0|unstable)/profile/ - -It will create events locally and then send them on to the main synapse -instance to be persisted and handled. diff --git a/synapse/config/server.py b/synapse/config/server.py index c8b9fe2d0f..7f8d315954 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -338,7 +338,7 @@ class ServerConfig(Config): ( "The metrics_port configuration option is deprecated in Synapse 0.31 " "in favour of a listener. Please see " - "http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.rst" + "http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md" " on how to configure the new listener." ) ) @@ -571,8 +571,8 @@ class ServerConfig(Config): # # type: the type of listener. Normally 'http', but other valid options are: # 'manhole' (see docs/manhole.md), - # 'metrics' (see docs/metrics-howto.rst), - # 'replication' (see docs/workers.rst). + # 'metrics' (see docs/metrics-howto.md), + # 'replication' (see docs/workers.md). # # tls: set to true to enable TLS for this listener. Will use the TLS # key/cert specified in tls_private_key_path / tls_certificate_path. @@ -607,12 +607,12 @@ class ServerConfig(Config): # # media: the media API (/_matrix/media). # - # metrics: the metrics interface. See docs/metrics-howto.rst. + # metrics: the metrics interface. See docs/metrics-howto.md. # # openid: OpenID authentication. # # replication: the HTTP replication API (/_synapse/replication). See - # docs/workers.rst. + # docs/workers.md. # # static: static resources under synapse/static (/_matrix/static). (Mostly # useful for 'fallback authentication'.) @@ -632,7 +632,7 @@ class ServerConfig(Config): # that unwraps TLS. # # If you plan to use a reverse proxy, please see - # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. + # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md. # %(unsecure_http_bindings)s -- cgit 1.4.1 From 6670bd407201f331353a4d402369da75b61ceca9 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 17 Sep 2019 18:05:13 +0100 Subject: v2 3PID Invites (part of MSC2140) (#5979) 3PID invites require making a request to an identity server to check that the invited 3PID has an Matrix ID linked, and if so, what it is. These requests are being made on behalf of a user. The user will supply an identity server and an access token for that identity server. The homeserver will then forward this request with the access token (using an `Authorization` header) and, if the given identity server doesn't support v2 endpoints, will fall back to v1 (which doesn't require any access tokens). Requires: ~~#5976~~ --- changelog.d/5979.feature | 1 + synapse/handlers/room_member.py | 104 +++++++++++++++++++++++++++++++--------- 2 files changed, 82 insertions(+), 23 deletions(-) create mode 100644 changelog.d/5979.feature diff --git a/changelog.d/5979.feature b/changelog.d/5979.feature new file mode 100644 index 0000000000..94888aa2d3 --- /dev/null +++ b/changelog.d/5979.feature @@ -0,0 +1 @@ +Use the v2 Identity Service API for 3PID invites. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 43d10a5308..35450feb6f 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -684,7 +684,14 @@ class RoomMemberHandler(object): ) else: yield self._make_and_store_3pid_invite( - requester, id_server, medium, address, room_id, inviter, txn_id=txn_id + requester, + id_server, + medium, + address, + room_id, + inviter, + txn_id=txn_id, + id_access_token=id_access_token, ) @defer.inlineCallbacks @@ -885,7 +892,15 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def _make_and_store_3pid_invite( - self, requester, id_server, medium, address, room_id, user, txn_id + self, + requester, + id_server, + medium, + address, + room_id, + user, + txn_id, + id_access_token=None, ): room_state = yield self.state_handler.get_current_state(room_id) @@ -934,6 +949,7 @@ class RoomMemberHandler(object): room_name=room_name, inviter_display_name=inviter_display_name, inviter_avatar_url=inviter_avatar_url, + id_access_token=id_access_token, ) ) @@ -971,6 +987,7 @@ class RoomMemberHandler(object): room_name, inviter_display_name, inviter_avatar_url, + id_access_token=None, ): """ Asks an identity server for a third party invite. @@ -990,6 +1007,8 @@ class RoomMemberHandler(object): inviter_display_name (str): The current display name of the inviter. inviter_avatar_url (str): The URL of the inviter's avatar. + id_access_token (str|None): The access token to authenticate to the identity + server with Returns: A deferred tuple containing: @@ -1000,11 +1019,6 @@ class RoomMemberHandler(object): display_name (str): A user-friendly name to represent the invited user. """ - is_url = "%s%s/_matrix/identity/api/v1/store-invite" % ( - id_server_scheme, - id_server, - ) - invite_config = { "medium": medium, "address": address, @@ -1017,22 +1031,67 @@ class RoomMemberHandler(object): "sender_display_name": inviter_display_name, "sender_avatar_url": inviter_avatar_url, } - try: - data = yield self.simple_http_client.post_json_get_json( - is_url, invite_config - ) - except HttpResponseException as e: - # Some identity servers may only support application/x-www-form-urlencoded - # types. This is especially true with old instances of Sydent, see - # https://github.com/matrix-org/sydent/pull/170 - logger.info( - "Failed to POST %s with JSON, falling back to urlencoded form: %s", - is_url, - e, + + # Add the identity service access token to the JSON body and use the v2 + # Identity Service endpoints if id_access_token is present + data = None + base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server) + + if id_access_token: + key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % ( + id_server_scheme, + id_server, ) - data = yield self.simple_http_client.post_urlencoded_get_json( - is_url, invite_config + + # Attempt a v2 lookup + url = base_url + "/v2/store-invite" + try: + data = yield self.simple_http_client.post_json_get_json( + url, + invite_config, + {"Authorization": create_id_access_token_header(id_access_token)}, + ) + except HttpResponseException as e: + if e.code != 404: + logger.info("Failed to POST %s with JSON: %s", url, e) + raise e + + if data is None: + key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % ( + id_server_scheme, + id_server, ) + url = base_url + "/api/v1/store-invite" + + try: + data = yield self.simple_http_client.post_json_get_json( + url, invite_config + ) + except HttpResponseException as e: + logger.warning( + "Error trying to call /store-invite on %s%s: %s", + id_server_scheme, + id_server, + e, + ) + + if data is None: + # Some identity servers may only support application/x-www-form-urlencoded + # types. This is especially true with old instances of Sydent, see + # https://github.com/matrix-org/sydent/pull/170 + try: + data = yield self.simple_http_client.post_urlencoded_get_json( + url, invite_config + ) + except HttpResponseException as e: + logger.warning( + "Error calling /store-invite on %s%s with fallback " + "encoding: %s", + id_server_scheme, + id_server, + e, + ) + raise e # TODO: Check for success token = data["token"] @@ -1040,8 +1099,7 @@ class RoomMemberHandler(object): if "public_key" in data: fallback_public_key = { "public_key": data["public_key"], - "key_validity_url": "%s%s/_matrix/identity/api/v1/pubkey/isvalid" - % (id_server_scheme, id_server), + "key_validity_url": key_validity_url, } else: fallback_public_key = public_keys[0] -- cgit 1.4.1 From 7100b5cc9de1620945eeba3d2725bd4239c4a3c6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 18 Sep 2019 10:16:00 +0100 Subject: fix sample config this was apparently broken by #6040. --- docs/sample_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index d5a8d24c2b..1ee0ba8c30 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1520,7 +1520,7 @@ opentracing: #enabled: true # The list of homeservers we wish to send and receive span contexts and span baggage. - # See docs/opentracing.md + # See docs/opentracing.rst # This is a list of regexes which are matched against the server_name of the # homeserver. # -- cgit 1.4.1 From a86a290850dad40c1ac38c4e20b2da039f246922 Mon Sep 17 00:00:00 2001 From: "J. Ryan Stinnett" Date: Wed, 18 Sep 2019 21:55:37 +0100 Subject: Fix logcontext spam on non-Linux platforms (#6059) This checks whether the current platform supports thread resource usage tracking before logging a warning to avoid log spam. Fixes https://github.com/matrix-org/synapse/issues/6055 --- changelog.d/6059.bugfix | 1 + synapse/logging/context.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6059.bugfix diff --git a/changelog.d/6059.bugfix b/changelog.d/6059.bugfix new file mode 100644 index 0000000000..49d5bd3fa0 --- /dev/null +++ b/changelog.d/6059.bugfix @@ -0,0 +1 @@ +Fix logcontext spam on non-Linux platforms. diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 63379bfb93..370000e377 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -42,13 +43,17 @@ try: # exception. resource.getrusage(RUSAGE_THREAD) + is_thread_resource_usage_supported = True + def get_thread_resource_usage(): return resource.getrusage(RUSAGE_THREAD) except Exception: # If the system doesn't support resource.getrusage(RUSAGE_THREAD) then we - # won't track resource usage by returning None. + # won't track resource usage. + is_thread_resource_usage_supported = False + def get_thread_resource_usage(): return None @@ -359,7 +364,11 @@ class LoggingContext(object): # When we stop, let's record the cpu used since we started if not self.usage_start: - logger.warning("Called stop on logcontext %s without calling start", self) + # Log a warning on platforms that support thread usage tracking + if is_thread_resource_usage_supported: + logger.warning( + "Called stop on logcontext %s without calling start", self + ) return utime_delta, stime_delta = self._get_cputime() -- cgit 1.4.1 From d58cad635e27f58bc4823ba5b0a1bb69aa8b8bbc Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Wed, 18 Sep 2019 22:27:59 +0100 Subject: Give appropriate exit codes when synctl fails (#5992) --- changelog.d/5992.feature | 1 + synctl | 43 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 changelog.d/5992.feature diff --git a/changelog.d/5992.feature b/changelog.d/5992.feature new file mode 100644 index 0000000000..31866c2925 --- /dev/null +++ b/changelog.d/5992.feature @@ -0,0 +1 @@ +Give appropriate exit codes when synctl fails. diff --git a/synctl b/synctl index a9629cf0e8..45acece30b 100755 --- a/synctl +++ b/synctl @@ -71,7 +71,20 @@ def abort(message, colour=RED, stream=sys.stderr): sys.exit(1) -def start(configfile, daemonize=True): +def start(configfile: str, daemonize: bool = True) -> bool: + """Attempts to start synapse. + Args: + configfile: path to a yaml synapse config file + daemonize: whether to daemonize synapse or keep it attached to the current + session + + Returns: + True if the process started successfully + False if there was an error starting the process + + If deamonize is False it will only return once synapse exits. + """ + write("Starting ...") args = SYNAPSE @@ -83,25 +96,40 @@ def start(configfile, daemonize=True): try: subprocess.check_call(args) write("started synapse.app.homeserver(%r)" % (configfile,), colour=GREEN) + return True except subprocess.CalledProcessError as e: write( "error starting (exit code: %d); see above for logs" % e.returncode, colour=RED, ) + return False + +def start_worker(app: str, configfile: str, worker_configfile: str) -> bool: + """Attempts to start a synapse worker. + Args: + app: name of the worker's appservice + configfile: path to a yaml synapse config file + worker_configfile: path to worker specific yaml synapse file + + Returns: + True if the process started successfully + False if there was an error starting the process + """ -def start_worker(app, configfile, worker_configfile): args = [sys.executable, "-B", "-m", app, "-c", configfile, "-c", worker_configfile] try: subprocess.check_call(args) write("started %s(%r)" % (app, worker_configfile), colour=GREEN) + return True except subprocess.CalledProcessError as e: write( "error starting %s(%r) (exit code: %d); see above for logs" % (app, worker_configfile, e.returncode), colour=RED, ) + return False def stop(pidfile, app): @@ -292,11 +320,14 @@ def main(): write("All processes exited; now restarting...") if action == "start" or action == "restart": + error = False if start_stop_synapse: # Check if synapse is already running if os.path.exists(pidfile) and pid_running(int(open(pidfile).read())): abort("synapse.app.homeserver already running") - start(configfile, bool(options.daemonize)) + + if not start(configfile, bool(options.daemonize)): + error = True for worker in workers: env = os.environ.copy() @@ -307,12 +338,16 @@ def main(): for cache_name, factor in iteritems(worker.cache_factors): os.environ["SYNAPSE_CACHE_FACTOR_" + cache_name.upper()] = str(factor) - start_worker(worker.app, configfile, worker.configfile) + if not start_worker(worker.app, configfile, worker.configfile): + error = True # Reset env back to the original os.environ.clear() os.environ.update(env) + if error: + exit(1) + if __name__ == "__main__": main() -- cgit 1.4.1 From 38fd1f8e3faeffbd4bb3084012bb2c17a953625f Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Wed, 18 Sep 2019 22:30:44 +0100 Subject: Fix typo in account_threepid_delegates config (#6028) --- changelog.d/6028.feature | 1 + docs/sample_config.yaml | 2 +- synapse/config/registration.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6028.feature diff --git a/changelog.d/6028.feature b/changelog.d/6028.feature new file mode 100644 index 0000000000..cf603fa0c6 --- /dev/null +++ b/changelog.d/6028.feature @@ -0,0 +1 @@ +Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 1ee0ba8c30..3e4edc6b0b 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -938,7 +938,7 @@ uploads_path: "DATADIR/uploads" # https://matrix.org/docs/spec/identity_service/latest # account_threepid_delegates: - #email: https://example.com # Delegate email sending to matrix.org + #email: https://example.com # Delegate email sending to example.org #msisdn: http://localhost:8090 # Delegate SMS sending to this local process # Users who register on this homeserver will automatically be joined diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 9548560edb..d4654e99b3 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -294,7 +294,7 @@ class RegistrationConfig(Config): # https://matrix.org/docs/spec/identity_service/latest # account_threepid_delegates: - #email: https://example.com # Delegate email sending to matrix.org + #email: https://example.com # Delegate email sending to example.org #msisdn: http://localhost:8090 # Delegate SMS sending to this local process # Users who register on this homeserver will automatically be joined -- cgit 1.4.1 From a136137b2efae0fa5b3344cb94759fe0f5913221 Mon Sep 17 00:00:00 2001 From: Pete Date: Thu, 19 Sep 2019 09:52:59 +0100 Subject: Update INSTALL.md with void-linux (#5873) --- INSTALL.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/INSTALL.md b/INSTALL.md index 5728882460..38c113b269 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -349,6 +349,13 @@ sudo pip uninstall py-bcrypt sudo pip install py-bcrypt ``` +### Void Linux + +Synapse can be found in the void repositories as 'synapse': + + xbps-install -Su + xbps-install -S synapse + ### FreeBSD Synapse can be installed via FreeBSD Ports or Packages contributed by Brendan Molloy from: -- cgit 1.4.1 From 62e3ff92fd3228b5c34f6cee691e22f9b1f85c9e Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 19 Sep 2019 10:53:14 +0100 Subject: Remove POST method from password reset submit_token endpoint (#6056) Removes the POST method from `/password_reset//submit_token/` as it's only used by phone number verification which Synapse does not support yet. --- changelog.d/6056.bugfix | 1 + synapse/rest/client/v2_alpha/account.py | 17 ----------------- 2 files changed, 1 insertion(+), 17 deletions(-) create mode 100644 changelog.d/6056.bugfix diff --git a/changelog.d/6056.bugfix b/changelog.d/6056.bugfix new file mode 100644 index 0000000000..4d9573a58d --- /dev/null +++ b/changelog.d/6056.bugfix @@ -0,0 +1 @@ +Remove POST method from password reset submit_token endpoint until we implement submit_url functionality. \ No newline at end of file diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 2ea515d2f6..afaaeeacdd 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -272,23 +272,6 @@ class PasswordResetSubmitTokenServlet(RestServlet): request.write(html.encode("utf-8")) finish_request(request) - @defer.inlineCallbacks - def on_POST(self, request, medium): - if medium != "email": - raise SynapseError( - 400, "This medium is currently not supported for password resets" - ) - - body = parse_json_object_from_request(request) - assert_params_in_dict(body, ["sid", "client_secret", "token"]) - - valid, _ = yield self.store.validate_threepid_session( - body["sid"], body["client_secret"], body["token"], self.clock.time_msec() - ) - response_code = 200 if valid else 400 - - return response_code, {"success": valid} - class PasswordRestServlet(RestServlet): PATTERNS = client_patterns("/account/password$") -- cgit 1.4.1 From 84a2743e2eaf5402cef8b68327efaf54daf64150 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 19 Sep 2019 10:55:43 +0100 Subject: Add changelog --- changelog.d/6064.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6064.misc diff --git a/changelog.d/6064.misc b/changelog.d/6064.misc new file mode 100644 index 0000000000..28dc89111b --- /dev/null +++ b/changelog.d/6064.misc @@ -0,0 +1 @@ +Clean up the sample config for SAML authentication. -- cgit 1.4.1 From bcd91328692555d85df346c4571085c9b41b8f6a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 19 Sep 2019 15:06:27 +0100 Subject: Undo the deletion of some tables (#6047) This is a partial revert of #5893. The problem is that if we drop these tables in the same release as removing the code that writes to them, it prevents users users from being able to roll back to a previous release. So let's leave the tables in place for now, and remember to drop them in a subsequent release. (Note that these tables haven't been *read* for *years*, so any missing rows resulting from a temporary upgrade to vNext won't cause a problem.) --- changelog.d/5893.misc | 2 +- changelog.d/6047.misc | 2 ++ .../schema/delta/56/drop_unused_event_tables.sql | 20 -------------------- 3 files changed, 3 insertions(+), 21 deletions(-) create mode 100644 changelog.d/6047.misc delete mode 100644 synapse/storage/schema/delta/56/drop_unused_event_tables.sql diff --git a/changelog.d/5893.misc b/changelog.d/5893.misc index 07ee4888dc..5ef171cb3e 100644 --- a/changelog.d/5893.misc +++ b/changelog.d/5893.misc @@ -1 +1 @@ -Drop some unused tables. +Stop populating some unused tables. diff --git a/changelog.d/6047.misc b/changelog.d/6047.misc new file mode 100644 index 0000000000..a4cdb8abb3 --- /dev/null +++ b/changelog.d/6047.misc @@ -0,0 +1,2 @@ +Stop populating some unused tables. + diff --git a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql deleted file mode 100644 index 9f09922c67..0000000000 --- a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright 2019 The Matrix.org Foundation C.I.C. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- these tables are never used. -DROP TABLE IF EXISTS room_names; -DROP TABLE IF EXISTS topics; -DROP TABLE IF EXISTS history_visibility; -DROP TABLE IF EXISTS guest_access; -- cgit 1.4.1 From 35ce3bda7aaa6281f02123225ca63d913fa12df1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 19 Sep 2019 15:06:48 +0100 Subject: Add some notes on rolling back to v1.3.1. (#6049) --- UPGRADE.rst | 25 +++++++++++++++++++++++++ changelog.d/6049.doc | 1 + 2 files changed, 26 insertions(+) create mode 100644 changelog.d/6049.doc diff --git a/UPGRADE.rst b/UPGRADE.rst index 5aaf804902..53f3af4ed1 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -99,6 +99,31 @@ Synapse will expect these files to exist inside the configured template director default templates, see `synapse/res/templates `_. +Rolling back to v1.3.1 +---------------------- + +If you encounter problems with v1.4.0, it should be possible to roll back to +v1.3.1, subject to the following: + +* The 'room statistics' engine was heavily reworked in this release (see + `#5971 `_), including + significant changes to the database schema, which are not easily + reverted. This will cause the room statistics engine to stop updating when + you downgrade. + + The room statistics are essentially unused in v1.3.1 (in future versions of + Synapse, they will be used to populate the room directory), so there should + be no loss of functionality. However, the statistics engine will write errors + to the logs, which can be avoided by setting the following in `homeserver.yaml`: + + .. code:: yaml + + stats: + enabled: false + + Don't forget to re-enable it when you upgrade again, in preparation for its + use in the room directory! + Upgrading to v1.2.0 =================== diff --git a/changelog.d/6049.doc b/changelog.d/6049.doc new file mode 100644 index 0000000000..e0307bf5c1 --- /dev/null +++ b/changelog.d/6049.doc @@ -0,0 +1 @@ +Add some notes on rolling back to v1.3.1. -- cgit 1.4.1 From 466866a1d9dd1fcf82348a36c0532cb0c6614767 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 19 Sep 2019 15:08:14 +0100 Subject: Update the issue template for new way of getting server version (#6051) cf #4878 --- .github/ISSUE_TEMPLATE/BUG_REPORT.md | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/BUG_REPORT.md b/.github/ISSUE_TEMPLATE/BUG_REPORT.md index 5cf844bfb1..9dd05bcba3 100644 --- a/.github/ISSUE_TEMPLATE/BUG_REPORT.md +++ b/.github/ISSUE_TEMPLATE/BUG_REPORT.md @@ -7,7 +7,7 @@ about: Create a report to help us improve -- **Homeserver**: +- **Homeserver**: If not matrix.org: -- **Version**: +- **Version**: -- **Install method**: +- **Install method**: -- **Platform**: +- **Platform**: