diff options
author | Andrew Morgan <andrew@amorgan.xyz> | 2019-02-26 14:23:40 +0000 |
---|---|---|
committer | Andrew Morgan <andrew@amorgan.xyz> | 2019-02-26 14:23:40 +0000 |
commit | 802884d4ee06ca8e42f46f64e6da7c188d43dc69 (patch) | |
tree | 6767e6e142d75e5500092a829d488583fcedef51 /synapse | |
parent | Add changelog (diff) | |
parent | Merge pull request #4745 from matrix-org/revert-4736-anoa/public_rooms_federate (diff) | |
download | synapse-802884d4ee06ca8e42f46f64e6da7c188d43dc69.tar.xz |
Merge branch 'develop' of github.com:matrix-org/synapse into anoa/public_rooms_federate_develop
Diffstat (limited to 'synapse')
193 files changed, 9279 insertions, 4548 deletions
diff --git a/synapse/__init__.py b/synapse/__init__.py index 89ea9a9775..2004375f98 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2018-9 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,4 +27,4 @@ try: except ImportError: pass -__version__ = "0.33.8" +__version__ = "0.99.1.1" diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py index 70cecde486..6e93f5a0c6 100644 --- a/synapse/_scripts/register_new_matrix_user.py +++ b/synapse/_scripts/register_new_matrix_user.py @@ -35,6 +35,7 @@ def request_registration( server_location, shared_secret, admin=False, + user_type=None, requests=_requests, _print=print, exit=sys.exit, @@ -45,7 +46,7 @@ def request_registration( # Get the nonce r = requests.get(url, verify=False) - if r.status_code is not 200: + if r.status_code != 200: _print("ERROR! Received %d %s" % (r.status_code, r.reason)) if 400 <= r.status_code < 500: try: @@ -65,6 +66,9 @@ def request_registration( mac.update(password.encode('utf8')) mac.update(b"\x00") mac.update(b"admin" if admin else b"notadmin") + if user_type: + mac.update(b"\x00") + mac.update(user_type.encode('utf8')) mac = mac.hexdigest() @@ -74,12 +78,13 @@ def request_registration( "password": password, "mac": mac, "admin": admin, + "user_type": user_type, } _print("Sending registration request...") r = requests.post(url, json=data, verify=False) - if r.status_code is not 200: + if r.status_code != 200: _print("ERROR! Received %d %s" % (r.status_code, r.reason)) if 400 <= r.status_code < 500: try: @@ -91,7 +96,7 @@ def request_registration( _print("Success!") -def register_new_user(user, password, server_location, shared_secret, admin): +def register_new_user(user, password, server_location, shared_secret, admin, user_type): if not user: try: default_user = getpass.getuser() @@ -129,7 +134,8 @@ def register_new_user(user, password, server_location, shared_secret, admin): else: admin = False - request_registration(user, password, server_location, shared_secret, bool(admin)) + request_registration(user, password, server_location, shared_secret, + bool(admin), user_type) def main(): @@ -154,6 +160,12 @@ def main(): default=None, help="New password for user. Will prompt if omitted.", ) + parser.add_argument( + "-t", + "--user_type", + default=None, + help="User type as specified in synapse.api.constants.UserTypes", + ) admin_group = parser.add_mutually_exclusive_group() admin_group.add_argument( "-a", @@ -208,7 +220,8 @@ def main(): if args.admin or args.no_admin: admin = args.admin - register_new_user(args.user, args.password, args.server_url, secret, admin) + register_new_user(args.user, args.password, args.server_url, secret, + admin, args.user_type) if __name__ == "__main__": diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 34382e4e3c..5992d30623 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -65,7 +65,7 @@ class Auth(object): register_cache("cache", "token_cache", self.token_cache) @defer.inlineCallbacks - def check_from_context(self, event, context, do_sig_check=True): + def check_from_context(self, room_version, event, context, do_sig_check=True): prev_state_ids = yield context.get_prev_state_ids(self.store) auth_events_ids = yield self.compute_auth_events( event, prev_state_ids, for_verification=True, @@ -74,12 +74,16 @@ class Auth(object): auth_events = { (e.type, e.state_key): e for e in itervalues(auth_events) } - self.check(event, auth_events=auth_events, do_sig_check=do_sig_check) + self.check( + room_version, event, + auth_events=auth_events, do_sig_check=do_sig_check, + ) - def check(self, event, auth_events, do_sig_check=True): + def check(self, room_version, event, auth_events, do_sig_check=True): """ Checks if this event is correctly authed. Args: + room_version (str): version of the room event: the event being checked. auth_events (dict: event-key -> event): the existing room state. @@ -88,7 +92,9 @@ class Auth(object): True if the auth checks pass. """ with Measure(self.clock, "auth.check"): - event_auth.check(event, auth_events, do_sig_check=do_sig_check) + event_auth.check( + room_version, event, auth_events, do_sig_check=do_sig_check + ) @defer.inlineCallbacks def check_joined_room(self, room_id, user_id, current_state=None): @@ -188,17 +194,33 @@ class Auth(object): """ # Can optionally look elsewhere in the request (e.g. headers) try: + ip_addr = self.hs.get_ip_from_request(request) + user_agent = request.requestHeaders.getRawHeaders( + b"User-Agent", + default=[b""] + )[0].decode('ascii', 'surrogateescape') + + access_token = self.get_access_token_from_request( + request, self.TOKEN_NOT_FOUND_HTTP_STATUS + ) + user_id, app_service = yield self._get_appservice_user_id(request) if user_id: request.authenticated_entity = user_id + + if ip_addr and self.hs.config.track_appservice_user_ips: + yield self.store.insert_client_ip( + user_id=user_id, + access_token=access_token, + ip=ip_addr, + user_agent=user_agent, + device_id="dummy-device", # stubbed + ) + defer.returnValue( synapse.types.create_requester(user_id, app_service=app_service) ) - access_token = self.get_access_token_from_request( - request, self.TOKEN_NOT_FOUND_HTTP_STATUS - ) - user_info = yield self.get_user_by_access_token(access_token, rights) user = user_info["user"] token_id = user_info["token_id"] @@ -208,11 +230,6 @@ class Auth(object): # stubbed out. device_id = user_info.get("device_id") - ip_addr = self.hs.get_ip_from_request(request) - user_agent = request.requestHeaders.getRawHeaders( - b"User-Agent", - default=[b""] - )[0].decode('ascii', 'surrogateescape') if user and access_token and ip_addr: yield self.store.insert_client_ip( user_id=user.to_string(), @@ -289,20 +306,28 @@ class Auth(object): Raises: AuthError if no user by that token exists or the token is invalid. """ - try: - user_id, guest = self._parse_and_validate_macaroon(token, rights) - except _InvalidMacaroonException: - # doesn't look like a macaroon: treat it as an opaque token which - # must be in the database. - # TODO: it would be nice to get rid of this, but apparently some - # people use access tokens which aren't macaroons + + if rights == "access": + # first look in the database r = yield self._look_up_user_by_access_token(token) - defer.returnValue(r) + if r: + defer.returnValue(r) + # otherwise it needs to be a valid macaroon try: + user_id, guest = self._parse_and_validate_macaroon(token, rights) user = UserID.from_string(user_id) - if guest: + if rights == "access": + if not guest: + # non-guest access tokens must be in the database + logger.warning("Unrecognised access token - not in store.") + raise AuthError( + self.TOKEN_NOT_FOUND_HTTP_STATUS, + "Unrecognised access token.", + errcode=Codes.UNKNOWN_TOKEN, + ) + # Guest access tokens are not stored in the database (there can # only be one access token per guest, anyway). # @@ -343,31 +368,15 @@ class Auth(object): "device_id": None, } else: - # This codepath exists for several reasons: - # * so that we can actually return a token ID, which is used - # in some parts of the schema (where we probably ought to - # use device IDs instead) - # * the only way we currently have to invalidate an - # access_token is by removing it from the database, so we - # have to check here that it is still in the db - # * some attributes (notably device_id) aren't stored in the - # macaroon. They probably should be. - # TODO: build the dictionary from the macaroon once the - # above are fixed - ret = yield self._look_up_user_by_access_token(token) - if ret["user"] != user: - logger.error( - "Macaroon user (%s) != DB user (%s)", - user, - ret["user"] - ) - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "User mismatch in macaroon", - errcode=Codes.UNKNOWN_TOKEN - ) + raise RuntimeError("Unknown rights setting %s", rights) defer.returnValue(ret) - except (pymacaroons.exceptions.MacaroonException, TypeError, ValueError): + except ( + _InvalidMacaroonException, + pymacaroons.exceptions.MacaroonException, + TypeError, + ValueError, + ) as e: + logger.warning("Invalid macaroon in auth: %s %s", type(e), e) raise AuthError( self.TOKEN_NOT_FOUND_HTTP_STATUS, "Invalid macaroon passed.", errcode=Codes.UNKNOWN_TOKEN @@ -497,11 +506,8 @@ class Auth(object): def _look_up_user_by_access_token(self, token): ret = yield self.store.get_user_by_access_token(token) if not ret: - logger.warn("Unrecognised access token - not in store.") - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, "Unrecognised access token.", - errcode=Codes.UNKNOWN_TOKEN - ) + defer.returnValue(None) + # we use ret.get() below because *lots* of unit tests stub out # get_user_by_access_token in a way where it only returns a couple of # the fields. @@ -545,17 +551,6 @@ class Auth(object): return self.store.is_server_admin(user) @defer.inlineCallbacks - def add_auth_events(self, builder, context): - prev_state_ids = yield context.get_prev_state_ids(self.store) - auth_ids = yield self.compute_auth_events(builder, prev_state_ids) - - auth_events_entries = yield self.store.add_event_hashes( - auth_ids - ) - - builder.auth_events = auth_events_entries - - @defer.inlineCallbacks def compute_auth_events(self, event, current_state_ids, for_verification=False): if event.type == EventTypes.Create: defer.returnValue([]) @@ -571,7 +566,7 @@ class Auth(object): key = (EventTypes.JoinRules, "", ) join_rule_event_id = current_state_ids.get(key) - key = (EventTypes.Member, event.user_id, ) + key = (EventTypes.Member, event.sender, ) member_event_id = current_state_ids.get(key) key = (EventTypes.Create, "", ) @@ -621,7 +616,7 @@ class Auth(object): defer.returnValue(auth_ids) - def check_redaction(self, event, auth_events): + def check_redaction(self, room_version, event, auth_events): """Check whether the event sender is allowed to redact the target event. Returns: @@ -634,7 +629,7 @@ class Auth(object): AuthError if the event sender is definitely not allowed to redact the target event. """ - return event_auth.check_redaction(event, auth_events) + return event_auth.check_redaction(room_version, event, auth_events) @defer.inlineCallbacks def check_can_change_room_list(self, room_id, user): @@ -791,9 +786,10 @@ class Auth(object): threepid should never be set at the same time. """ - # Never fail an auth check for the server notices users + # Never fail an auth check for the server notices users or support user # This can be a problem where event creation is prohibited due to blocking - if user_id == self.hs.config.server_notices_mxid: + is_support = yield self.store.is_support_user(user_id) + if user_id == self.hs.config.server_notices_mxid or is_support: return if self.hs.config.hs_disabled: @@ -818,7 +814,9 @@ class Auth(object): elif threepid: # If the user does not exist yet, but is signing up with a # reserved threepid then pass auth check - if is_threepid_reserved(self.hs.config, threepid): + if is_threepid_reserved( + self.hs.config.mau_limits_reserved_threepids, threepid + ): return # Else if there is no room in the MAU bucket, bail current_mau = yield self.store.get_monthly_active_count() diff --git a/synapse/api/constants.py b/synapse/api/constants.py index c2630c4c64..f47c33a074 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -51,6 +51,7 @@ class LoginType(object): EMAIL_IDENTITY = u"m.login.email.identity" MSISDN = u"m.login.msisdn" RECAPTCHA = u"m.login.recaptcha" + TERMS = u"m.login.terms" DUMMY = u"m.login.dummy" # Only for C/S API v1 @@ -61,15 +62,18 @@ class LoginType(object): class EventTypes(object): Member = "m.room.member" Create = "m.room.create" + Tombstone = "m.room.tombstone" JoinRules = "m.room.join_rules" PowerLevels = "m.room.power_levels" Aliases = "m.room.aliases" Redaction = "m.room.redaction" ThirdPartyInvite = "m.room.third_party_invite" + Encryption = "m.room.encryption" RoomHistoryVisibility = "m.room.history_visibility" CanonicalAlias = "m.room.canonical_alias" RoomAvatar = "m.room.avatar" + RoomEncryption = "m.room.encryption" GuestAccess = "m.room.guest_access" # These are used for validation @@ -100,7 +104,14 @@ class ThirdPartyEntityKind(object): class RoomVersions(object): V1 = "1" - VDH_TEST = "vdh-test-version" + V2 = "2" + V3 = "3" + STATE_V2_TEST = "state-v2-test" + + +class RoomDisposition(object): + STABLE = "stable" + UNSTABLE = "unstable" # the version we will give rooms which are created on this server @@ -108,7 +119,36 @@ DEFAULT_ROOM_VERSION = RoomVersions.V1 # vdh-test-version is a placeholder to get room versioning support working and tested # until we have a working v2. -KNOWN_ROOM_VERSIONS = {RoomVersions.V1, RoomVersions.VDH_TEST} +KNOWN_ROOM_VERSIONS = { + RoomVersions.V1, + RoomVersions.V2, + RoomVersions.V3, + RoomVersions.STATE_V2_TEST, + RoomVersions.V3, +} + + +class EventFormatVersions(object): + """This is an internal enum for tracking the version of the event format, + independently from the room version. + """ + V1 = 1 + V2 = 2 + + +KNOWN_EVENT_FORMAT_VERSIONS = { + EventFormatVersions.V1, + EventFormatVersions.V2, +} + ServerNoticeMsgType = "m.server_notice" ServerNoticeLimitReached = "m.server_notice.usage_limit_reached" + + +class UserTypes(object): + """Allows for user type specific behaviour. With the benefit of hindsight + 'admin' and 'guest' users should also be UserTypes. Normal users are type None + """ + SUPPORT = "support" + ALL_USER_TYPES = (SUPPORT,) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 48b903374d..0b464834ce 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -348,6 +348,24 @@ class IncompatibleRoomVersionError(SynapseError): ) +class RequestSendFailed(RuntimeError): + """Sending a HTTP request over federation failed due to not being able to + talk to the remote server for some reason. + + This exception is used to differentiate "expected" errors that arise due to + networking (e.g. DNS failures, connection timeouts etc), versus unexpected + errors (like programming errors). + """ + def __init__(self, inner_exception, can_retry): + super(RequestSendFailed, self).__init__( + "Failed to send request: %s: %s" % ( + type(inner_exception).__name__, inner_exception, + ) + ) + self.inner_exception = inner_exception + self.can_retry = can_retry + + def cs_error(msg, code=Codes.UNKNOWN, **kwargs): """ Utility method for constructing an error response for client-server interactions. diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 677c0bdd4c..3906475403 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from six import text_type + import jsonschema from canonicaljson import json from jsonschema import FormatChecker @@ -353,7 +355,7 @@ class Filter(object): sender = event.user_id room_id = None ev_type = "m.presence" - is_url = False + contains_url = False else: sender = event.get("sender", None) if not sender: @@ -368,13 +370,16 @@ class Filter(object): room_id = event.get("room_id", None) ev_type = event.get("type", None) - is_url = "url" in event.get("content", {}) + + content = event.get("content", {}) + # check if there is a string url field in the content for filtering purposes + contains_url = isinstance(content.get("url"), text_type) return self.check_fields( room_id, sender, ev_type, - is_url, + contains_url, ) def check_fields(self, room_id, sender, event_type, contains_url): @@ -439,6 +444,20 @@ class Filter(object): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) + def with_room_ids(self, room_ids): + """Returns a new filter with the given room IDs appended. + + Args: + room_ids (iterable[unicode]): The room_ids to add + + Returns: + filter: A new filter including the given rooms and the old + filter's rooms. + """ + newFilter = Filter(self.filter_json) + newFilter.rooms += room_ids + return newFilter + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 6d9f1ca0ef..8102176653 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -24,11 +24,12 @@ from synapse.config import ConfigError CLIENT_PREFIX = "/_matrix/client/api/v1" CLIENT_V2_ALPHA_PREFIX = "/_matrix/client/v2_alpha" -FEDERATION_PREFIX = "/_matrix/federation/v1" +FEDERATION_PREFIX = "/_matrix/federation" +FEDERATION_V1_PREFIX = FEDERATION_PREFIX + "/v1" +FEDERATION_V2_PREFIX = FEDERATION_PREFIX + "/v2" STATIC_PREFIX = "/_matrix/static" WEB_CLIENT_PREFIX = "/_matrix/client" CONTENT_REPO_PREFIX = "/_matrix/content" -SERVER_KEY_PREFIX = "/_matrix/key/v1" SERVER_KEY_V2_PREFIX = "/_matrix/key/v2" MEDIA_PREFIX = "/_matrix/media/r0" LEGACY_MEDIA_PREFIX = "/_matrix/media/v1" diff --git a/synapse/app/__init__.py b/synapse/app/__init__.py index c3afcc573b..f56f5fcc13 100644 --- a/synapse/app/__init__.py +++ b/synapse/app/__init__.py @@ -12,22 +12,38 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import logging import sys from synapse import python_dependencies # noqa: E402 sys.dont_write_bytecode = True +logger = logging.getLogger(__name__) try: python_dependencies.check_requirements() -except python_dependencies.MissingRequirementError as e: - message = "\n".join([ - "Missing Requirement: %s" % (str(e),), - "To install run:", - " pip install --upgrade --force \"%s\"" % (e.dependency,), - "", - ]) - sys.stderr.writelines(message) +except python_dependencies.DependencyException as e: + sys.stderr.writelines(e.message) sys.exit(1) + + +def check_bind_error(e, address, bind_addresses): + """ + This method checks an exception occurred while binding on 0.0.0.0. + If :: is specified in the bind addresses a warning is shown. + The exception is still raised otherwise. + + Binding on both 0.0.0.0 and :: causes an exception on Linux and macOS + because :: binds on both IPv4 and IPv6 (as per RFC 3493). + When binding on 0.0.0.0 after :: this can safely be ignored. + + Args: + e (Exception): Exception that was caught. + address (str): Address on which binding was attempted. + bind_addresses (list): Addresses on which the service listens. + """ + if address == '0.0.0.0' and '::' in bind_addresses: + logger.warn('Failed to listen on 0.0.0.0, continuing because listening on [::]') + else: + raise e diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 18584226e9..32e8b8a3f5 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -15,18 +15,38 @@ import gc import logging +import signal import sys +import traceback import psutil from daemonize import Daemonize from twisted.internet import error, reactor +from twisted.protocols.tls import TLSMemoryBIOFactory +import synapse +from synapse.app import check_bind_error +from synapse.crypto import context_factory from synapse.util import PreserveLoggingContext from synapse.util.rlimit import change_resource_limit +from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) +_sighup_callbacks = [] + + +def register_sighup(func): + """ + Register a function to be called when a SIGHUP occurs. + + Args: + func (function): Function to be called when sent a SIGHUP signal. + Will be called with a single argument, the homeserver. + """ + _sighup_callbacks.append(func) + def start_worker_reactor(appname, config): """ Run the reactor in the main process @@ -135,62 +155,154 @@ def listen_metrics(bind_addresses, port): from prometheus_client import start_http_server for host in bind_addresses: - reactor.callInThread(start_http_server, int(port), - addr=host, registry=RegistryProxy) - logger.info("Metrics now reporting on %s:%d", host, port) + logger.info("Starting metrics listener on %s:%d", host, port) + start_http_server(port, addr=host, registry=RegistryProxy) def listen_tcp(bind_addresses, port, factory, reactor=reactor, backlog=50): """ Create a TCP socket for a port and several addresses + + Returns: + list[twisted.internet.tcp.Port]: listening for TCP connections """ + r = [] for address in bind_addresses: try: - reactor.listenTCP( - port, - factory, - backlog, - address + r.append( + reactor.listenTCP( + port, + factory, + backlog, + address + ) ) except error.CannotListenError as e: check_bind_error(e, address, bind_addresses) + return r + def listen_ssl( bind_addresses, port, factory, context_factory, reactor=reactor, backlog=50 ): """ - Create an SSL socket for a port and several addresses + Create an TLS-over-TCP socket for a port and several addresses + + Returns: + list of twisted.internet.tcp.Port listening for TLS connections """ + r = [] for address in bind_addresses: try: - reactor.listenSSL( - port, - factory, - context_factory, - backlog, - address + r.append( + reactor.listenSSL( + port, + factory, + context_factory, + backlog, + address + ) ) except error.CannotListenError as e: check_bind_error(e, address, bind_addresses) + return r -def check_bind_error(e, address, bind_addresses): + +def refresh_certificate(hs): + """ + Refresh the TLS certificates that Synapse is using by re-reading them from + disk and updating the TLS context factories to use them. """ - This method checks an exception occurred while binding on 0.0.0.0. - If :: is specified in the bind addresses a warning is shown. - The exception is still raised otherwise. - Binding on both 0.0.0.0 and :: causes an exception on Linux and macOS - because :: binds on both IPv4 and IPv6 (as per RFC 3493). - When binding on 0.0.0.0 after :: this can safely be ignored. + if not hs.config.has_tls_listener(): + # attempt to reload the certs for the good of the tls_fingerprints + hs.config.read_certificate_from_disk(require_cert_and_key=False) + return + + hs.config.read_certificate_from_disk(require_cert_and_key=True) + hs.tls_server_context_factory = context_factory.ServerContextFactory(hs.config) + + if hs._listening_services: + logger.info("Updating context factories...") + for i in hs._listening_services: + # When you listenSSL, it doesn't make an SSL port but a TCP one with + # a TLS wrapping factory around the factory you actually want to get + # requests. This factory attribute is public but missing from + # Twisted's documentation. + if isinstance(i.factory, TLSMemoryBIOFactory): + addr = i.getHost() + logger.info( + "Replacing TLS context factory on [%s]:%i", addr.host, addr.port, + ) + # We want to replace TLS factories with a new one, with the new + # TLS configuration. We do this by reaching in and pulling out + # the wrappedFactory, and then re-wrapping it. + i.factory = TLSMemoryBIOFactory( + hs.tls_server_context_factory, + False, + i.factory.wrappedFactory + ) + logger.info("Context factories updated.") + + +def start(hs, listeners=None): + """ + Start a Synapse server or worker. Args: - e (Exception): Exception that was caught. - address (str): Address on which binding was attempted. - bind_addresses (list): Addresses on which the service listens. + hs (synapse.server.HomeServer) + listeners (list[dict]): Listener configuration ('listeners' in homeserver.yaml) """ - if address == '0.0.0.0' and '::' in bind_addresses: - logger.warn('Failed to listen on 0.0.0.0, continuing because listening on [::]') - else: - raise e + try: + # Set up the SIGHUP machinery. + if hasattr(signal, "SIGHUP"): + def handle_sighup(*args, **kwargs): + for i in _sighup_callbacks: + i(hs) + + signal.signal(signal.SIGHUP, handle_sighup) + + register_sighup(refresh_certificate) + + # Load the certificate from disk. + refresh_certificate(hs) + + # It is now safe to start your Synapse. + hs.start_listening(listeners) + hs.get_datastore().start_profiling() + + setup_sentry(hs) + except Exception: + traceback.print_exc(file=sys.stderr) + reactor = hs.get_reactor() + if reactor.running: + reactor.stop() + sys.exit(1) + + +def setup_sentry(hs): + """Enable sentry integration, if enabled in configuration + + Args: + hs (synapse.server.HomeServer) + """ + + if not hs.config.sentry_enabled: + return + + import sentry_sdk + sentry_sdk.init( + dsn=hs.config.sentry_dsn, + release=get_version_string(synapse), + ) + + # We set some default tags that give some context to this instance + with sentry_sdk.configure_scope() as scope: + scope.set_tag("matrix_server_name", hs.config.server_name) + + app = hs.config.worker_app if hs.config.worker_app else "synapse.app.homeserver" + name = hs.config.worker_name if hs.config.worker_name else "master" + scope.set_tag("worker_app", app) + scope.set_tag("worker_name", name) diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 8559e141af..33107f56d1 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -168,12 +168,7 @@ def start(config_options): ) ps.setup() - ps.start_listening(config.worker_listeners) - - def start(): - ps.get_datastore().start_profiling() - - reactor.callWhenRunning(start) + reactor.callWhenRunning(_base.start, ps, config.worker_listeners) _base.start_worker_reactor("synapse-appservice", config) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 76aed8c60a..043b48f8f3 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -25,7 +25,6 @@ from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging -from synapse.crypto import context_factory from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy @@ -41,6 +40,7 @@ from synapse.replication.slave.storage.registration import SlavedRegistrationSto from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.client.v1.login import LoginRestServlet from synapse.rest.client.v1.room import ( JoinedRoomMemberListRestServlet, PublicRoomListRestServlet, @@ -48,6 +48,7 @@ from synapse.rest.client.v1.room import ( RoomMemberListRestServlet, RoomStateRestServlet, ) +from synapse.rest.client.v2_alpha.register import RegisterRestServlet from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.util.httpresourcetree import create_resource_tree @@ -93,6 +94,8 @@ class ClientReaderServer(HomeServer): JoinedRoomMemberListRestServlet(self).register(resource) RoomStateRestServlet(self).register(resource) RoomEventContextServlet(self).register(resource) + RegisterRestServlet(self).register(resource) + LoginRestServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, @@ -164,26 +167,16 @@ def start(config_options): database_engine = create_engine(config.database_config) - tls_server_context_factory = context_factory.ServerContextFactory(config) - tls_client_options_factory = context_factory.ClientTLSOptionsFactory(config) - ss = ClientReaderServer( config.server_name, db_config=config.database_config, - tls_server_context_factory=tls_server_context_factory, - tls_client_options_factory=tls_client_options_factory, config=config, version_string="Synapse/" + get_version_string(synapse), database_engine=database_engine, ) ss.setup() - ss.start_listening(config.worker_listeners) - - def start(): - ss.get_datastore().start_profiling() - - reactor.callWhenRunning(start) + reactor.callWhenRunning(_base.start, ss, config.worker_listeners) _base.start_worker_reactor("synapse-client-reader", config) diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index e4a68715aa..b8e5196152 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -25,7 +25,6 @@ from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging -from synapse.crypto import context_factory from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy @@ -185,26 +184,16 @@ def start(config_options): database_engine = create_engine(config.database_config) - tls_server_context_factory = context_factory.ServerContextFactory(config) - tls_client_options_factory = context_factory.ClientTLSOptionsFactory(config) - ss = EventCreatorServer( config.server_name, db_config=config.database_config, - tls_server_context_factory=tls_server_context_factory, - tls_client_options_factory=tls_client_options_factory, config=config, version_string="Synapse/" + get_version_string(synapse), database_engine=database_engine, ) ss.setup() - ss.start_listening(config.worker_listeners) - - def start(): - ss.get_datastore().start_profiling() - - reactor.callWhenRunning(start) + reactor.callWhenRunning(_base.start, ss, config.worker_listeners) _base.start_worker_reactor("synapse-event-creator", config) diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 228a297fb8..b116c17669 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -26,7 +26,6 @@ from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging -from synapse.crypto import context_factory from synapse.federation.transport.server import TransportLayerServer from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy @@ -41,6 +40,7 @@ from synapse.replication.slave.storage.profile import SlavedProfileStore from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore from synapse.replication.slave.storage.pushers import SlavedPusherStore from synapse.replication.slave.storage.receipts import SlavedReceiptsStore +from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler @@ -63,6 +63,7 @@ class FederationReaderSlavedStore( SlavedReceiptsStore, SlavedEventStore, SlavedKeyStore, + SlavedRegistrationStore, RoomStore, DirectoryStore, SlavedTransactionStore, @@ -87,6 +88,16 @@ class FederationReaderServer(HomeServer): resources.update({ FEDERATION_PREFIX: TransportLayerServer(self), }) + if name == "openid" and "federation" not in res["names"]: + # Only load the openid resource separately if federation resource + # is not specified since federation resource includes openid + # resource. + resources.update({ + FEDERATION_PREFIX: TransportLayerServer( + self, + servlet_groups=["openid"], + ), + }) root_resource = create_resource_tree(resources, NoResource()) @@ -99,7 +110,8 @@ class FederationReaderServer(HomeServer): listener_config, root_resource, self.version_string, - ) + ), + reactor=self.get_reactor() ) logger.info("Synapse federation reader now listening on port %d", port) @@ -151,26 +163,16 @@ def start(config_options): database_engine = create_engine(config.database_config) - tls_server_context_factory = context_factory.ServerContextFactory(config) - tls_client_options_factory = context_factory.ClientTLSOptionsFactory(config) - ss = FederationReaderServer( config.server_name, db_config=config.database_config, - tls_server_context_factory=tls_server_context_factory, - tls_client_options_factory=tls_client_options_factory, config=config, version_string="Synapse/" + get_version_string(synapse), database_engine=database_engine, ) ss.setup() - ss.start_listening(config.worker_listeners) - - def start(): - ss.get_datastore().start_profiling() - - reactor.callWhenRunning(start) + reactor.callWhenRunning(_base.start, ss, config.worker_listeners) _base.start_worker_reactor("synapse-federation-reader", config) diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index e9a99d76e1..a461442fdc 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -25,7 +25,6 @@ from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging -from synapse.crypto import context_factory from synapse.federation import send_queue from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy @@ -183,26 +182,17 @@ def start(config_options): # Force the pushers to start since they will be disabled in the main config config.send_federation = True - tls_server_context_factory = context_factory.ServerContextFactory(config) - tls_client_options_factory = context_factory.ClientTLSOptionsFactory(config) - - ps = FederationSenderServer( + ss = FederationSenderServer( config.server_name, db_config=config.database_config, - tls_server_context_factory=tls_server_context_factory, - tls_client_options_factory=tls_client_options_factory, config=config, version_string="Synapse/" + get_version_string(synapse), database_engine=database_engine, ) - ps.setup() - ps.start_listening(config.worker_listeners) - - def start(): - ps.get_datastore().start_profiling() + ss.setup() + reactor.callWhenRunning(_base.start, ss, config.worker_listeners) - reactor.callWhenRunning(start) _base.start_worker_reactor("synapse-federation-sender", config) diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index f5c61dec5b..8479fee738 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -21,12 +21,11 @@ from twisted.web.resource import NoResource import synapse from synapse import events -from synapse.api.errors import SynapseError +from synapse.api.errors import HttpResponseException, SynapseError from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging -from synapse.crypto import context_factory from synapse.http.server import JsonResource from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseSite @@ -67,10 +66,15 @@ class PresenceStatusStubServlet(ClientV1RestServlet): headers = { "Authorization": auth_headers, } - result = yield self.http_client.get_json( - self.main_uri + request.uri.decode('ascii'), - headers=headers, - ) + + try: + result = yield self.http_client.get_json( + self.main_uri + request.uri.decode('ascii'), + headers=headers, + ) + except HttpResponseException as e: + raise e.to_synapse_error() + defer.returnValue((200, result)) @defer.inlineCallbacks @@ -241,26 +245,16 @@ def start(config_options): database_engine = create_engine(config.database_config) - tls_server_context_factory = context_factory.ServerContextFactory(config) - tls_client_options_factory = context_factory.ClientTLSOptionsFactory(config) - ss = FrontendProxyServer( config.server_name, db_config=config.database_config, - tls_server_context_factory=tls_server_context_factory, - tls_client_options_factory=tls_client_options_factory, config=config, version_string="Synapse/" + get_version_string(synapse), database_engine=database_engine, ) ss.setup() - ss.start_listening(config.worker_listeners) - - def start(): - ss.get_datastore().start_profiling() - - reactor.callWhenRunning(start) + reactor.callWhenRunning(_base.start, ss, config.worker_listeners) _base.start_worker_reactor("synapse-frontend-proxy", config) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 593e1e75db..05a97979ec 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +14,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from __future__ import print_function + import gc import logging import os @@ -25,6 +29,7 @@ from prometheus_client import Gauge from twisted.application import service from twisted.internet import defer, reactor +from twisted.python.failure import Failure from twisted.web.resource import EncodingResourceWrapper, NoResource from twisted.web.server import GzipEncoderFactory from twisted.web.static import File @@ -37,7 +42,6 @@ from synapse.api.urls import ( FEDERATION_PREFIX, LEGACY_MEDIA_PREFIX, MEDIA_PREFIX, - SERVER_KEY_PREFIX, SERVER_KEY_V2_PREFIX, STATIC_PREFIX, WEB_CLIENT_PREFIX, @@ -46,7 +50,6 @@ from synapse.app import _base from synapse.app._base import listen_ssl, listen_tcp, quit_with_error from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig -from synapse.crypto import context_factory from synapse.federation.transport.server import TransportLayerServer from synapse.http.additional_resource import AdditionalResource from synapse.http.server import RootRedirect @@ -55,13 +58,13 @@ from synapse.metrics import RegistryProxy from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.module_api import ModuleApi -from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, check_requirements +from synapse.python_dependencies import check_requirements from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource -from synapse.rest.key.v1.server_key_resource import LocalKey from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.media.v0.content_repository import ContentRepoResource +from synapse.rest.well_known import WellKnownResource from synapse.server import HomeServer from synapse.storage import DataStore, are_all_users_on_domain from synapse.storage.engines import IncorrectDatabaseSetup, create_engine @@ -81,36 +84,6 @@ def gz_wrap(r): return EncodingResourceWrapper(r, [GzipEncoderFactory()]) -def build_resource_for_web_client(hs): - webclient_path = hs.get_config().web_client_location - if not webclient_path: - try: - import syweb - except ImportError: - quit_with_error( - "Could not find a webclient.\n\n" - "Please either install the matrix-angular-sdk or configure\n" - "the location of the source to serve via the configuration\n" - "option `web_client_location`\n\n" - "To install the `matrix-angular-sdk` via pip, run:\n\n" - " pip install '%(dep)s'\n" - "\n" - "You can also disable hosting of the webclient via the\n" - "configuration option `web_client`\n" - % {"dep": CONDITIONAL_REQUIREMENTS["web_client"].keys()[0]} - ) - syweb_path = os.path.dirname(syweb.__file__) - webclient_path = os.path.join(syweb_path, "webclient") - # GZip is disabled here due to - # https://twistedmatrix.com/trac/ticket/7678 - # (It can stay enabled for the API resources: they call - # write() with the whole body and then finish() straight - # after and so do not trigger the bug. - # GzipFile was removed in commit 184ba09 - # return GzipFile(webclient_path) # TODO configurable? - return File(webclient_path) # TODO configurable? - - class SynapseHomeServer(HomeServer): DATASTORE_CLASS = DataStore @@ -120,12 +93,13 @@ class SynapseHomeServer(HomeServer): tls = listener_config.get("tls", False) site_tag = listener_config.get("tag", port) - if tls and config.no_tls: - return - resources = {} for res in listener_config["resources"]: for name in res["names"]: + if name == "openid" and "federation" in res["names"]: + # Skip loading openid resource if federation is defined + # since federation resource will include openid + continue resources.update(self._configure_named_resource( name, res.get("compress", False), )) @@ -139,15 +113,18 @@ class SynapseHomeServer(HomeServer): handler = handler_cls(config, module_api) resources[path] = AdditionalResource(self, handler.handle_request) + # try to find something useful to redirect '/' to if WEB_CLIENT_PREFIX in resources: root_resource = RootRedirect(WEB_CLIENT_PREFIX) + elif STATIC_PREFIX in resources: + root_resource = RootRedirect(STATIC_PREFIX) else: root_resource = NoResource() root_resource = create_resource_tree(resources, root_resource) if tls: - listen_ssl( + ports = listen_ssl( bind_addresses, port, SynapseSite( @@ -158,10 +135,12 @@ class SynapseHomeServer(HomeServer): self.version_string, ), self.tls_server_context_factory, + reactor=self.get_reactor(), ) + logger.info("Synapse now listening on TCP port %d (TLS)", port) else: - listen_tcp( + ports = listen_tcp( bind_addresses, port, SynapseSite( @@ -170,9 +149,12 @@ class SynapseHomeServer(HomeServer): listener_config, root_resource, self.version_string, - ) + ), + reactor=self.get_reactor(), ) - logger.info("Synapse now listening on port %d", port) + logger.info("Synapse now listening on TCP port %d", port) + + return ports def _configure_named_resource(self, name, compress=False): """Build a resource map for a named resource @@ -197,8 +179,13 @@ class SynapseHomeServer(HomeServer): "/_matrix/client/unstable": client_resource, "/_matrix/client/v2_alpha": client_resource, "/_matrix/client/versions": client_resource, + "/.well-known/matrix/client": WellKnownResource(self), }) + if self.get_config().saml2_enabled: + from synapse.rest.saml2 import SAML2Resource + resources["/_matrix/saml2"] = SAML2Resource(self) + if name == "consent": from synapse.rest.consent.consent_resource import ConsentResource consent_resource = ConsentResource(self) @@ -213,6 +200,11 @@ class SynapseHomeServer(HomeServer): FEDERATION_PREFIX: TransportLayerServer(self), }) + if name == "openid": + resources.update({ + FEDERATION_PREFIX: TransportLayerServer(self, servlet_groups=["openid"]), + }) + if name in ["static", "client"]: resources.update({ STATIC_PREFIX: File( @@ -236,13 +228,19 @@ class SynapseHomeServer(HomeServer): ) if name in ["keys", "federation"]: - resources.update({ - SERVER_KEY_PREFIX: LocalKey(self), - SERVER_KEY_V2_PREFIX: KeyApiV2Resource(self), - }) + resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self) if name == "webclient": - resources[WEB_CLIENT_PREFIX] = build_resource_for_web_client(self) + webclient_path = self.get_config().web_client_location + + if webclient_path is None: + logger.warning( + "Not enabling webclient resource, as web_client_location is unset." + ) + else: + # GZip is disabled here due to + # https://twistedmatrix.com/trac/ticket/7678 + resources[WEB_CLIENT_PREFIX] = File(webclient_path) if name == "metrics" and self.get_config().enable_metrics: resources[METRICS_PREFIX] = MetricsResource(RegistryProxy) @@ -252,12 +250,14 @@ class SynapseHomeServer(HomeServer): return resources - def start_listening(self): + def start_listening(self, listeners): config = self.get_config() - for listener in config.listeners: + for listener in listeners: if listener["type"] == "http": - self._listener_http(config, listener) + self._listening_services.extend( + self._listener_http(config, listener) + ) elif listener["type"] == "manhole": listen_tcp( listener["bind_addresses"], @@ -269,14 +269,14 @@ class SynapseHomeServer(HomeServer): ) ) elif listener["type"] == "replication": - bind_addresses = listener["bind_addresses"] - for address in bind_addresses: - factory = ReplicationStreamProtocolFactory(self) - server_listener = reactor.listenTCP( - listener["port"], factory, interface=address - ) + services = listen_tcp( + listener["bind_addresses"], + listener["port"], + ReplicationStreamProtocolFactory(self), + ) + for s in services: reactor.addSystemEventTrigger( - "before", "shutdown", server_listener.stopListening, + "before", "shutdown", s.stopListening, ) elif listener["type"] == "metrics": if not self.get_config().enable_metrics: @@ -337,24 +337,19 @@ def setup(config_options): # generating config files and shouldn't try to continue. sys.exit(0) - synapse.config.logger.setup_logging(config, use_worker_options=False) - - # check any extra requirements we have now we have a config - check_requirements(config) + synapse.config.logger.setup_logging( + config, + use_worker_options=False + ) events.USE_FROZEN_DICTS = config.use_frozen_dicts - tls_server_context_factory = context_factory.ServerContextFactory(config) - tls_client_options_factory = context_factory.ClientTLSOptionsFactory(config) - database_engine = create_engine(config.database_config) config.database_config["args"]["cp_openfun"] = database_engine.on_new_connection hs = SynapseHomeServer( config.server_name, db_config=config.database_config, - tls_server_context_factory=tls_server_context_factory, - tls_client_options_factory=tls_client_options_factory, config=config, version_string="Synapse/" + get_version_string(synapse), database_engine=database_engine, @@ -381,12 +376,79 @@ def setup(config_options): logger.info("Database prepared in %s.", config.database_config['name']) hs.setup() - hs.start_listening() + @defer.inlineCallbacks + def do_acme(): + """ + Reprovision an ACME certificate, if it's required. + + Returns: + Deferred[bool]: Whether the cert has been updated. + """ + acme = hs.get_acme_handler() + + # Check how long the certificate is active for. + cert_days_remaining = hs.config.is_disk_cert_valid( + allow_self_signed=False + ) + + # We want to reprovision if cert_days_remaining is None (meaning no + # certificate exists), or the days remaining number it returns + # is less than our re-registration threshold. + provision = False + + if ( + cert_days_remaining is None or + cert_days_remaining < hs.config.acme_reprovision_threshold + ): + provision = True + + if provision: + yield acme.provision_certificate() + + defer.returnValue(provision) + + @defer.inlineCallbacks + def reprovision_acme(): + """ + Provision a certificate from ACME, if required, and reload the TLS + certificate if it's renewed. + """ + reprovisioned = yield do_acme() + if reprovisioned: + _base.refresh_certificate(hs) + + @defer.inlineCallbacks def start(): - hs.get_pusherpool().start() - hs.get_datastore().start_profiling() - hs.get_datastore().start_doing_background_updates() + try: + # Run the ACME provisioning code, if it's enabled. + if hs.config.acme_enabled: + acme = hs.get_acme_handler() + # Start up the webservices which we will respond to ACME + # challenges with, and then provision. + yield acme.start_listening() + yield do_acme() + + # Check if it needs to be reprovisioned every day. + hs.get_clock().looping_call( + reprovision_acme, + 24 * 60 * 60 * 1000 + ) + + _base.start(hs, config.listeners) + + hs.get_pusherpool().start() + hs.get_datastore().start_doing_background_updates() + except Exception: + # Print the exception and bail out. + print("Error during startup:", file=sys.stderr) + + # this gives better tracebacks than traceback.print_exc() + Failure().printTraceback(file=sys.stderr) + + if reactor.running: + reactor.stop() + sys.exit(1) reactor.callWhenRunning(start) @@ -394,7 +456,8 @@ def setup(config_options): class SynapseService(service.Service): - """A twisted Service class that will start synapse. Used to run synapse + """ + A twisted Service class that will start synapse. Used to run synapse via twistd and a .tac. """ def __init__(self, config): @@ -540,7 +603,7 @@ def run(hs): current_mau_count = 0 reserved_count = 0 store = hs.get_datastore() - if hs.config.limit_usage_by_mau: + if hs.config.limit_usage_by_mau or hs.config.mau_stats_only: current_mau_count = yield store.get_monthly_active_count() reserved_count = yield store.get_registered_reserved_users_count() current_mau_gauge.set(float(current_mau_count)) @@ -554,7 +617,7 @@ def run(hs): ) start_generate_monthly_active_users() - if hs.config.limit_usage_by_mau: + if hs.config.limit_usage_by_mau or hs.config.mau_stats_only: clock.looping_call(start_generate_monthly_active_users, 5 * 60 * 1000) # End of monthly active user settings diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index acc0487adc..d4cc4e9443 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -26,7 +26,6 @@ from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging -from synapse.crypto import context_factory from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy from synapse.metrics.resource import METRICS_PREFIX, MetricsResource @@ -151,26 +150,16 @@ def start(config_options): database_engine = create_engine(config.database_config) - tls_server_context_factory = context_factory.ServerContextFactory(config) - tls_client_options_factory = context_factory.ClientTLSOptionsFactory(config) - ss = MediaRepositoryServer( config.server_name, db_config=config.database_config, - tls_server_context_factory=tls_server_context_factory, - tls_client_options_factory=tls_client_options_factory, config=config, version_string="Synapse/" + get_version_string(synapse), database_engine=database_engine, ) ss.setup() - ss.start_listening(config.worker_listeners) - - def start(): - ss.get_datastore().start_profiling() - - reactor.callWhenRunning(start) + reactor.callWhenRunning(_base.start, ss, config.worker_listeners) _base.start_worker_reactor("synapse-media-repository", config) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 83b0863f00..cbf0d67f51 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -224,11 +224,10 @@ def start(config_options): ) ps.setup() - ps.start_listening(config.worker_listeners) def start(): + _base.start(ps, config.worker_listeners) ps.get_pusherpool().start() - ps.get_datastore().start_profiling() reactor.callWhenRunning(start) diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 3926c7f263..9163b56d86 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -226,7 +226,15 @@ class SynchrotronPresence(object): class SynchrotronTyping(object): def __init__(self, hs): self._latest_room_serial = 0 + self._reset() + + def _reset(self): + """ + Reset the typing handler's data caches. + """ + # map room IDs to serial numbers self._room_serials = {} + # map room IDs to sets of users currently typing self._room_typing = {} def stream_positions(self): @@ -236,6 +244,12 @@ class SynchrotronTyping(object): return {"typing": self._latest_room_serial} def process_replication_rows(self, token, rows): + if self._latest_room_serial > token: + # The master has gone backwards. To prevent inconsistent data, just + # clear everything. + self._reset() + + # Set the latest serial token to whatever the server gave us. self._latest_room_serial = token for row in rows: @@ -431,12 +445,7 @@ def start(config_options): ) ss.setup() - ss.start_listening(config.worker_listeners) - - def start(): - ss.get_datastore().start_profiling() - - reactor.callWhenRunning(start) + reactor.callWhenRunning(_base.start, ss, config.worker_listeners) _base.start_worker_reactor("synapse-synchrotron", config) diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index 0a5f62b509..d1ab9512cd 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -26,7 +26,6 @@ from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging -from synapse.crypto import context_factory from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy @@ -211,26 +210,16 @@ def start(config_options): # Force the pushers to start since they will be disabled in the main config config.update_user_directory = True - tls_server_context_factory = context_factory.ServerContextFactory(config) - tls_client_options_factory = context_factory.ClientTLSOptionsFactory(config) - - ps = UserDirectoryServer( + ss = UserDirectoryServer( config.server_name, db_config=config.database_config, - tls_server_context_factory=tls_server_context_factory, - tls_client_options_factory=tls_client_options_factory, config=config, version_string="Synapse/" + get_version_string(synapse), database_engine=database_engine, ) - ps.setup() - ps.start_listening(config.worker_listeners) - - def start(): - ps.get_datastore().start_profiling() - - reactor.callWhenRunning(start) + ss.setup() + reactor.callWhenRunning(_base.start, ss, config.worker_listeners) _base.start_worker_reactor("synapse-user-dir", config) diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py index 2430814796..685f15c061 100644 --- a/synapse/appservice/scheduler.py +++ b/synapse/appservice/scheduler.py @@ -53,8 +53,8 @@ import logging from twisted.internet import defer from synapse.appservice import ApplicationServiceState +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.logcontext import run_in_background -from synapse.util.metrics import Measure logger = logging.getLogger(__name__) @@ -104,27 +104,34 @@ class _ServiceQueuer(object): self.clock = clock def enqueue(self, service, event): - # if this service isn't being sent something self.queued_events.setdefault(service.id, []).append(event) - run_in_background(self._send_request, service) - @defer.inlineCallbacks - def _send_request(self, service): + # start a sender for this appservice if we don't already have one + if service.id in self.requests_in_flight: return + run_as_background_process( + "as-sender-%s" % (service.id, ), + self._send_request, service, + ) + + @defer.inlineCallbacks + def _send_request(self, service): + # sanity-check: we shouldn't get here if this service already has a sender + # running. + assert(service.id not in self.requests_in_flight) + self.requests_in_flight.add(service.id) try: while True: events = self.queued_events.pop(service.id, []) if not events: return - - with Measure(self.clock, "servicequeuer.send"): - try: - yield self.txn_ctrl.send(service, events) - except Exception: - logger.exception("AS request failed") + try: + yield self.txn_ctrl.send(service, events) + except Exception: + logger.exception("AS request failed") finally: self.requests_in_flight.discard(service.id) @@ -223,7 +230,12 @@ class _Recoverer(object): self.backoff_counter = 1 def recover(self): - self.clock.call_later((2 ** self.backoff_counter), self.retry) + def _retry(): + run_as_background_process( + "as-recoverer-%s" % (self.service.id,), + self.retry, + ) + self.clock.call_later((2 ** self.backoff_counter), _retry) def _backoff(self): # cap the backoff to be around 8.5min => (2^9) = 512 secs diff --git a/synapse/config/__main__.py b/synapse/config/__main__.py index 79fe9c3dac..fca35b008c 100644 --- a/synapse/config/__main__.py +++ b/synapse/config/__main__.py @@ -16,7 +16,7 @@ from synapse.config._base import ConfigError if __name__ == "__main__": import sys - from homeserver import HomeServerConfig + from synapse.config.homeserver import HomeServerConfig action = sys.argv[1] diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 14dae65ea0..5aec43b702 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -135,10 +135,6 @@ class Config(object): return file_stream.read() @staticmethod - def default_path(name): - return os.path.abspath(os.path.join(os.path.curdir, name)) - - @staticmethod def read_config_file(file_path): with open(file_path) as file_stream: return yaml.load(file_stream) @@ -151,8 +147,39 @@ class Config(object): return results def generate_config( - self, config_dir_path, server_name, is_generating_file, report_stats=None + self, + config_dir_path, + data_dir_path, + server_name, + generate_secrets=False, + report_stats=None, ): + """Build a default configuration file + + This is used both when the user explicitly asks us to generate a config file + (eg with --generate_config), and before loading the config at runtime (to give + a base which the config files override) + + Args: + config_dir_path (str): The path where the config files are kept. Used to + create filenames for things like the log config and the signing key. + + data_dir_path (str): The path where the data files are kept. Used to create + filenames for things like the database and media store. + + server_name (str): The server name. Used to initialise the server_name + config param, but also used in the names of some of the config files. + + generate_secrets (bool): True if we should generate new secrets for things + like the macaroon_secret_key. If False, these parameters will be left + unset. + + report_stats (bool|None): Initial setting for the report_stats setting. + If None, report_stats will be left unset. + + Returns: + str: the yaml config file + """ default_config = "# vim:ft=yaml\n" default_config += "\n\n".join( @@ -160,15 +187,14 @@ class Config(object): for conf in self.invoke_all( "default_config", config_dir_path=config_dir_path, + data_dir_path=data_dir_path, server_name=server_name, - is_generating_file=is_generating_file, + generate_secrets=generate_secrets, report_stats=report_stats, ) ) - config = yaml.load(default_config) - - return default_config, config + return default_config @classmethod def load_config(cls, description, argv): @@ -231,7 +257,7 @@ class Config(object): "--keys-directory", metavar="DIRECTORY", help="Used with 'generate-*' options to specify where files such as" - " certs and signing keys should be stored in, unless explicitly" + " signing keys should be stored, unless explicitly" " specified in the config.", ) config_parser.add_argument( @@ -274,27 +300,24 @@ class Config(object): if not cls.path_exists(config_dir_path): os.makedirs(config_dir_path) with open(config_path, "w") as config_file: - config_str, config = obj.generate_config( + config_str = obj.generate_config( config_dir_path=config_dir_path, + data_dir_path=os.getcwd(), server_name=server_name, report_stats=(config_args.report_stats == "yes"), - is_generating_file=True, + generate_secrets=True, ) + config = yaml.load(config_str) obj.invoke_all("generate_files", config) config_file.write(config_str) print( ( "A config file has been generated in %r for server name" - " %r with corresponding SSL keys and self-signed" - " certificates. Please review this file and customise it" + " %r. Please review this file and customise it" " to your needs." ) % (config_path, server_name) ) - print( - "If this server name is incorrect, you will need to" - " regenerate the SSL certificates" - ) return else: print( @@ -339,7 +362,7 @@ class Config(object): if not keys_directory: keys_directory = os.path.dirname(config_files[-1]) - config_dir_path = os.path.abspath(keys_directory) + self.config_dir_path = os.path.abspath(keys_directory) specified_config = {} for config_file in config_files: @@ -350,11 +373,13 @@ class Config(object): raise ConfigError(MISSING_SERVER_NAME) server_name = specified_config["server_name"] - _, config = self.generate_config( - config_dir_path=config_dir_path, + config_string = self.generate_config( + config_dir_path=self.config_dir_path, + data_dir_path=os.getcwd(), server_name=server_name, - is_generating_file=False, + generate_secrets=False, ) + config = yaml.load(config_string) config.pop("log_config") config.update(specified_config) diff --git a/synapse/config/api.py b/synapse/config/api.py index 403d96ba76..e8a753f002 100644 --- a/synapse/config/api.py +++ b/synapse/config/api.py @@ -24,6 +24,7 @@ class ApiConfig(Config): EventTypes.JoinRules, EventTypes.CanonicalAlias, EventTypes.RoomAvatar, + EventTypes.RoomEncryption, EventTypes.Name, ]) @@ -32,9 +33,11 @@ class ApiConfig(Config): ## API Configuration ## # A list of event types that will be included in the room_invite_state + # room_invite_state_types: - "{JoinRules}" - "{CanonicalAlias}" - "{RoomAvatar}" + - "{RoomEncryption}" - "{Name}" """.format(**vars(EventTypes)) diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index 3b161d708a..c260d59464 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -33,11 +33,18 @@ class AppServiceConfig(Config): def read_config(self, config): self.app_service_config_files = config.get("app_service_config_files", []) self.notify_appservices = config.get("notify_appservices", True) + self.track_appservice_user_ips = config.get("track_appservice_user_ips", False) def default_config(cls, **kwargs): return """\ # A list of application service config file to use + # app_service_config_files: [] + + # Whether or not to track application service IP addresses. Implicitly + # enables MAU tracking for application service users. + # + track_appservice_user_ips: False """ diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index 7ba0c2de6a..d25196be08 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -30,19 +30,22 @@ class CaptchaConfig(Config): # See docs/CAPTCHA_SETUP for full details of configuring this. # This Home Server's ReCAPTCHA public key. + # recaptcha_public_key: "YOUR_PUBLIC_KEY" # This Home Server's ReCAPTCHA private key. + # recaptcha_private_key: "YOUR_PRIVATE_KEY" # Enables ReCaptcha checks when registering, preventing signup # unless a captcha is answered. Requires a valid ReCaptcha # public/private key. + # enable_registration_captcha: False # A secret key used to bypass the captcha test entirely. #captcha_bypass_secret: "YOUR_SECRET_HERE" # The API endpoint to use for verifying m.login.recaptcha responses. - recaptcha_siteverify_api: "https://www.google.com/recaptcha/api/siteverify" + recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" """ diff --git a/synapse/config/cas.py b/synapse/config/cas.py index 8109e5f95e..609c0815c8 100644 --- a/synapse/config/cas.py +++ b/synapse/config/cas.py @@ -38,6 +38,7 @@ class CasConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): return """ # Enable CAS for registration and login. + # #cas_config: # enabled: true # server_url: "https://cas-server.com" diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index e22c731aad..abeb0180d3 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from os import path + +from synapse.config import ConfigError + from ._base import Config DEFAULT_CONFIG = """\ @@ -42,18 +46,28 @@ DEFAULT_CONFIG = """\ # until the user consents to the privacy policy. The value of the setting is # used as the text of the error. # -# user_consent: -# template_dir: res/templates/privacy -# version: 1.0 -# server_notice_content: -# msgtype: m.text -# body: >- -# To continue using this homeserver you must review and agree to the -# terms and conditions at %(consent_uri)s -# send_server_notice_to_guests: True -# block_events_error: >- -# To continue using this homeserver you must review and agree to the -# terms and conditions at %(consent_uri)s +# 'require_at_registration', if enabled, will add a step to the registration +# process, similar to how captcha works. Users will be required to accept the +# policy before their account is created. +# +# 'policy_name' is the display name of the policy users will see when registering +# for an account. Has no effect unless `require_at_registration` is enabled. +# Defaults to "Privacy Policy". +# +#user_consent: +# template_dir: res/templates/privacy +# version: 1.0 +# server_notice_content: +# msgtype: m.text +# body: >- +# To continue using this homeserver you must review and agree to the +# terms and conditions at %(consent_uri)s +# send_server_notice_to_guests: True +# block_events_error: >- +# To continue using this homeserver you must review and agree to the +# terms and conditions at %(consent_uri)s +# require_at_registration: False +# policy_name: Privacy Policy # """ @@ -67,13 +81,23 @@ class ConsentConfig(Config): self.user_consent_server_notice_content = None self.user_consent_server_notice_to_guests = False self.block_events_without_consent_error = None + self.user_consent_at_registration = False + self.user_consent_policy_name = "Privacy Policy" def read_config(self, config): consent_config = config.get("user_consent") if consent_config is None: return self.user_consent_version = str(consent_config["version"]) - self.user_consent_template_dir = consent_config["template_dir"] + self.user_consent_template_dir = self.abspath( + consent_config["template_dir"] + ) + if not path.isdir(self.user_consent_template_dir): + raise ConfigError( + "Could not find template directory '%s'" % ( + self.user_consent_template_dir, + ), + ) self.user_consent_server_notice_content = consent_config.get( "server_notice_content", ) @@ -83,6 +107,12 @@ class ConsentConfig(Config): self.user_consent_server_notice_to_guests = bool(consent_config.get( "send_server_notice_to_guests", False, )) + self.user_consent_at_registration = bool(consent_config.get( + "require_at_registration", False, + )) + self.user_consent_policy_name = consent_config.get( + "policy_name", "Privacy Policy", + ) def default_config(self, **kwargs): return DEFAULT_CONFIG diff --git a/synapse/config/database.py b/synapse/config/database.py index e915d9d09b..c8890147a6 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os from ._base import Config @@ -45,8 +46,8 @@ class DatabaseConfig(Config): self.set_databasepath(config.get("database_path")) - def default_config(self, **kwargs): - database_path = self.abspath("homeserver.db") + def default_config(self, data_dir_path, **kwargs): + database_path = os.path.join(data_dir_path, "homeserver.db") return """\ # Database configuration database: diff --git a/synapse/config/groups.py b/synapse/config/groups.py index 997fa2881f..46933a904c 100644 --- a/synapse/config/groups.py +++ b/synapse/config/groups.py @@ -24,9 +24,11 @@ class GroupsConfig(Config): def default_config(self, **kwargs): return """\ # Whether to allow non server admins to create groups on this server + # enable_group_creation: false # If enabled, non server admins can only create groups with local parts # starting with this prefix - # group_creation_prefix: "unofficial/" + # + #group_creation_prefix: "unofficial/" """ diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 10dd40159f..727fdc54d8 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -32,7 +32,7 @@ from .ratelimiting import RatelimitConfig from .registration import RegistrationConfig from .repository import ContentRepositoryConfig from .room_directory import RoomDirectoryConfig -from .saml2 import SAML2Config +from .saml2_config import SAML2Config from .server import ServerConfig from .server_notices_config import ServerNoticesConfig from .spam_checker import SpamCheckerConfig @@ -42,7 +42,7 @@ from .voip import VoipConfig from .workers import WorkerConfig -class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, +class HomeServerConfig(ServerConfig, TlsConfig, DatabaseConfig, LoggingConfig, RatelimitConfig, ContentRepositoryConfig, CaptchaConfig, VoipConfig, RegistrationConfig, MetricsConfig, ApiConfig, AppServiceConfig, KeyConfig, SAML2Config, CasConfig, @@ -53,10 +53,3 @@ class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, ServerNoticesConfig, RoomDirectoryConfig, ): pass - - -if __name__ == '__main__': - import sys - sys.stdout.write( - HomeServerConfig().generate_config(sys.argv[1], sys.argv[2], True)[0] - ) diff --git a/synapse/config/jwt_config.py b/synapse/config/jwt_config.py index 51e7f7e003..ecb4124096 100644 --- a/synapse/config/jwt_config.py +++ b/synapse/config/jwt_config.py @@ -46,8 +46,8 @@ class JWTConfig(Config): return """\ # The JWT needs to contain a globally unique "sub" (subject) claim. # - # jwt_config: - # enabled: true - # secret: "a secret" - # algorithm: "HS256" + #jwt_config: + # enabled: true + # secret: "a secret" + # algorithm: "HS256" """ diff --git a/synapse/config/key.py b/synapse/config/key.py index 279c47bb48..35f05fa974 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -40,7 +40,7 @@ class KeyConfig(Config): def read_config(self, config): self.signing_key = self.read_signing_key(config["signing_key_path"]) self.old_signing_keys = self.read_old_signing_keys( - config["old_signing_keys"] + config.get("old_signing_keys", {}) ) self.key_refresh_interval = self.parse_duration( config["key_refresh_interval"] @@ -56,9 +56,9 @@ class KeyConfig(Config): if not self.macaroon_secret_key: # Unfortunately, there are people out there that don't have this # set. Lets just be "nice" and derive one from their secret key. - logger.warn("Config is missing missing macaroon_secret_key") - seed = self.signing_key[0].seed - self.macaroon_secret_key = hashlib.sha256(seed) + logger.warn("Config is missing macaroon_secret_key") + seed = bytes(self.signing_key[0]) + self.macaroon_secret_key = hashlib.sha256(seed).digest() self.expire_access_token = config.get("expire_access_token", False) @@ -66,35 +66,46 @@ class KeyConfig(Config): # falsification of values self.form_secret = config.get("form_secret", None) - def default_config(self, config_dir_path, server_name, is_generating_file=False, + def default_config(self, config_dir_path, server_name, generate_secrets=False, **kwargs): base_key_name = os.path.join(config_dir_path, server_name) - if is_generating_file: - macaroon_secret_key = random_string_with_symbols(50) - form_secret = '"%s"' % random_string_with_symbols(50) + if generate_secrets: + macaroon_secret_key = 'macaroon_secret_key: "%s"' % ( + random_string_with_symbols(50), + ) + form_secret = 'form_secret: "%s"' % random_string_with_symbols(50) else: - macaroon_secret_key = None - form_secret = 'null' + macaroon_secret_key = "# macaroon_secret_key: <PRIVATE STRING>" + form_secret = "# form_secret: <PRIVATE STRING>" return """\ - macaroon_secret_key: "%(macaroon_secret_key)s" + # a secret which is used to sign access tokens. If none is specified, + # the registration_shared_secret is used, if one is given; otherwise, + # a secret key is derived from the signing key. + # + %(macaroon_secret_key)s # Used to enable access token expiration. + # expire_access_token: False # a secret which is used to calculate HMACs for form values, to stop - # falsification of values - form_secret: %(form_secret)s + # falsification of values. Must be specified for the User Consent + # forms to work. + # + %(form_secret)s ## Signing Keys ## # Path to the signing key to sign messages with + # signing_key_path: "%(base_key_name)s.signing.key" # The keys that the server used to sign messages with but won't use # to sign new messages. E.g. it has lost its private key - old_signing_keys: {} + # + #old_signing_keys: # "ed25519:auto": # # Base64 encoded public key # key: "The public part of your old signing key." @@ -105,9 +116,11 @@ class KeyConfig(Config): # Used to set the valid_until_ts in /key/v2 APIs. # Determines how quickly servers will query to check which keys # are still valid. + # key_refresh_interval: "1d" # 1 Day. # The trusted servers to download signing keys from. + # perspectives: servers: "matrix.org": diff --git a/synapse/config/logger.py b/synapse/config/logger.py index e9a936118d..f6940b65fd 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -15,7 +15,6 @@ import logging import logging.config import os -import signal import sys from string import Template @@ -24,6 +23,7 @@ import yaml from twisted.logger import STDLibLogObserver, globalLogBeginner import synapse +from synapse.app import _base as appbase from synapse.util.logcontext import LoggingContextFilter from synapse.util.versionstring import get_version_string @@ -50,6 +50,7 @@ handlers: maxBytes: 104857600 backupCount: 10 filters: [context] + encoding: utf8 console: class: logging.StreamHandler formatter: precise @@ -79,11 +80,10 @@ class LoggingConfig(Config): self.log_file = self.abspath(config.get("log_file")) def default_config(self, config_dir_path, server_name, **kwargs): - log_config = self.abspath( - os.path.join(config_dir_path, server_name + ".log.config") - ) + log_config = os.path.join(config_dir_path, server_name + ".log.config") return """ # A yaml python logging config file + # log_config: "%(log_config)s" """ % locals() @@ -137,6 +137,9 @@ def setup_logging(config, use_worker_options=False): use_worker_options (bool): True to use 'worker_log_config' and 'worker_log_file' options instead of 'log_config' and 'log_file'. + + register_sighup (func | None): Function to call to register a + sighup handler. """ log_config = (config.worker_log_config if use_worker_options else config.log_config) @@ -179,7 +182,7 @@ def setup_logging(config, use_worker_options=False): else: handler = logging.StreamHandler() - def sighup(signum, stack): + def sighup(*args): pass handler.setFormatter(formatter) @@ -192,20 +195,14 @@ def setup_logging(config, use_worker_options=False): with open(log_config, 'r') as f: logging.config.dictConfig(yaml.load(f)) - def sighup(signum, stack): + def sighup(*args): # it might be better to use a file watcher or something for this. load_log_config() logging.info("Reloaded log config from %s due to SIGHUP", log_config) load_log_config() - # TODO(paul): obviously this is a terrible mechanism for - # stealing SIGHUP, because it means no other part of synapse - # can use it instead. If we want to catch SIGHUP anywhere - # else as well, I'd suggest we find a nicer way to broadcast - # it around. - if getattr(signal, "SIGHUP"): - signal.signal(signal.SIGHUP, sighup) + appbase.register_sighup(sighup) # make sure that the first thing we log is a thing we can grep backwards # for @@ -246,3 +243,5 @@ def setup_logging(config, use_worker_options=False): [_log], redirectStandardIO=not config.no_redirect_stdio, ) + if not config.no_redirect_stdio: + print("Redirected stdout/stderr to logs") diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 61155c99d0..ed0498c634 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -13,7 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import Config +from ._base import Config, ConfigError + +MISSING_SENTRY = ( + """Missing sentry-sdk library. This is required to enable sentry + integration. + """ +) class MetricsConfig(Config): @@ -23,11 +29,43 @@ class MetricsConfig(Config): self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") + self.sentry_enabled = "sentry" in config + if self.sentry_enabled: + try: + import sentry_sdk # noqa F401 + except ImportError: + raise ConfigError(MISSING_SENTRY) + + self.sentry_dsn = config["sentry"].get("dsn") + if not self.sentry_dsn: + raise ConfigError( + "sentry.dsn field is required when sentry integration is enabled", + ) + def default_config(self, report_stats=None, **kwargs): - suffix = "" if report_stats is None else "report_stats: %(report_stats)s\n" - return ("""\ + res = """\ ## Metrics ### # Enable collection and rendering of performance metrics + # enable_metrics: False - """ + suffix) % locals() + + # Enable sentry integration + # NOTE: While attempts are made to ensure that the logs don't contain + # any sensitive information, this cannot be guaranteed. By enabling + # this option the sentry server may therefore receive sensitive + # information, and it in turn may then diseminate sensitive information + # through insecure notification channels if so configured. + # + #sentry: + # dsn: "..." + + # Whether or not to report anonymized homeserver usage statistics. + """ + + if report_stats is None: + res += "# report_stats: true|false\n" + else: + res += "report_stats: %s\n" % ('true' if report_stats else 'false') + + return res diff --git a/synapse/config/password.py b/synapse/config/password.py index a4bd171399..2a52b9db54 100644 --- a/synapse/config/password.py +++ b/synapse/config/password.py @@ -28,6 +28,7 @@ class PasswordConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): return """ # Enable password for login. + # password_config: enabled: true # Uncomment and change to a secret random string for extra security. diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index f4066abc28..f0a6be0679 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -52,18 +52,18 @@ class PasswordAuthProviderConfig(Config): def default_config(self, **kwargs): return """\ - # password_providers: - # - module: "ldap_auth_provider.LdapAuthProvider" - # config: - # enabled: true - # uri: "ldap://ldap.example.com:389" - # start_tls: true - # base: "ou=users,dc=example,dc=com" - # attributes: - # uid: "cn" - # mail: "email" - # name: "givenName" - # #bind_dn: - # #bind_password: - # #filter: "(objectClass=posixAccount)" + #password_providers: + # - module: "ldap_auth_provider.LdapAuthProvider" + # config: + # enabled: true + # uri: "ldap://ldap.example.com:389" + # start_tls: true + # base: "ou=users,dc=example,dc=com" + # attributes: + # uid: "cn" + # mail: "email" + # name: "givenName" + # #bind_dn: + # #bind_password: + # #filter: "(objectClass=posixAccount)" """ diff --git a/synapse/config/push.py b/synapse/config/push.py index b7e0d46afa..62c0060c9c 100644 --- a/synapse/config/push.py +++ b/synapse/config/push.py @@ -51,11 +51,11 @@ class PushConfig(Config): # notification request includes the content of the event (other details # like the sender are still included). For `event_id_only` push, it # has no effect. - + # # For modern android devices the notification content will still appear # because it is loaded by the app. iPhone, however will send a # notification saying only that a message arrived and who it came from. # #push: - # include_content: true + # include_content: true """ diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 83b22dc199..54b71e6841 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -32,27 +32,34 @@ class RatelimitConfig(Config): ## Ratelimiting ## # Number of messages a client can send per second + # rc_messages_per_second: 0.2 # Number of message a client can send before being throttled + # rc_message_burst_count: 10.0 # The federation window size in milliseconds + # federation_rc_window_size: 1000 # The number of federation requests from a single server in a window # before the server will delay processing the request. + # federation_rc_sleep_limit: 10 # The duration in milliseconds to delay processing events from # remote servers by if they go over the sleep limit. + # federation_rc_sleep_delay: 500 # The maximum number of concurrent federation requests allowed # from a single server + # federation_rc_reject_limit: 50 # The number of federation requests to concurrently process from a # single server + # federation_rc_concurrent: 3 """ diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 7480ed5145..2881482f96 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -37,6 +37,7 @@ class RegistrationConfig(Config): self.bcrypt_rounds = config.get("bcrypt_rounds", 12) self.trusted_third_party_id_servers = config["trusted_third_party_id_servers"] + self.default_identity_server = config.get("default_identity_server") self.allow_guest_access = config.get("allow_guest_access", False) self.invite_3pid_guest = ( @@ -49,8 +50,17 @@ class RegistrationConfig(Config): raise ConfigError('Invalid auto_join_rooms entry %s' % (room_alias,)) self.autocreate_auto_join_rooms = config.get("autocreate_auto_join_rooms", True) - def default_config(self, **kwargs): - registration_shared_secret = random_string_with_symbols(50) + self.disable_msisdn_registration = ( + config.get("disable_msisdn_registration", False) + ) + + def default_config(self, generate_secrets=False, **kwargs): + if generate_secrets: + registration_shared_secret = 'registration_shared_secret: "%s"' % ( + random_string_with_symbols(50), + ) + else: + registration_shared_secret = '# registration_shared_secret: <PRIVATE STRING>' return """\ ## Registration ## @@ -60,54 +70,75 @@ class RegistrationConfig(Config): # The user must provide all of the below types of 3PID when registering. # - # registrations_require_3pid: - # - email - # - msisdn + #registrations_require_3pid: + # - email + # - msisdn + + # Explicitly disable asking for MSISDNs from the registration + # flow (overrides registrations_require_3pid if MSISDNs are set as required) + # + #disable_msisdn_registration: True # Mandate that users are only allowed to associate certain formats of # 3PIDs with accounts on this server. # - # allowed_local_3pids: - # - medium: email - # pattern: ".*@matrix\\.org" - # - medium: email - # pattern: ".*@vector\\.im" - # - medium: msisdn - # pattern: "\\+44" + #allowed_local_3pids: + # - medium: email + # pattern: '.*@matrix\\.org' + # - medium: email + # pattern: '.*@vector\\.im' + # - medium: msisdn + # pattern: '\\+44' # If set, allows registration by anyone who also has the shared # secret, even if registration is otherwise disabled. - registration_shared_secret: "%(registration_shared_secret)s" + # + %(registration_shared_secret)s # Set the number of bcrypt rounds used to generate password hash. # Larger numbers increase the work factor needed to generate the hash. # The default number is 12 (which equates to 2^12 rounds). # N.B. that increasing this will exponentially increase the time required # to register or login - e.g. 24 => 2^24 rounds which will take >20 mins. + # bcrypt_rounds: 12 # Allows users to register as guests without a password/email/etc, and # participate in rooms hosted on this server which have been made # accessible to anonymous users. + # allow_guest_access: False + # The identity server which we suggest that clients should use when users log + # in on this server. + # + # (By default, no suggestion is made, so it is left up to the client. + # This setting is ignored unless public_baseurl is also set.) + # + #default_identity_server: https://matrix.org + # The list of identity servers trusted to verify third party # identifiers by this server. + # + # Also defines the ID server which will be called when an account is + # deactivated (one will be picked arbitrarily). + # trusted_third_party_id_servers: - - matrix.org - - vector.im - - riot.im + - matrix.org + - vector.im # Users who register on this homeserver will automatically be joined # to these rooms + # #auto_join_rooms: - # - "#example:example.com" + # - "#example:example.com" # Where auto_join_rooms are specified, setting this flag ensures that the # the rooms exist by creating them when the first user on the # homeserver registers. # Setting to false means that if the rooms are not manually created, # users cannot be auto-joined since they do not exist. + # autocreate_auto_join_rooms: true """ % locals() diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 06c62ab62c..97db2a5b7a 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import os from collections import namedtuple from synapse.util.module_loader import load_module @@ -175,34 +175,39 @@ class ContentRepositoryConfig(Config): "url_preview_url_blacklist", () ) - def default_config(self, **kwargs): - media_store = self.default_path("media_store") - uploads_path = self.default_path("uploads") + def default_config(self, data_dir_path, **kwargs): + media_store = os.path.join(data_dir_path, "media_store") + uploads_path = os.path.join(data_dir_path, "uploads") return r""" # Directory where uploaded images and attachments are stored. + # media_store_path: "%(media_store)s" # Media storage providers allow media to be stored in different # locations. - # media_storage_providers: - # - module: file_system - # # Whether to write new local files. - # store_local: false - # # Whether to write new remote media - # store_remote: false - # # Whether to block upload requests waiting for write to this - # # provider to complete - # store_synchronous: false - # config: - # directory: /mnt/some/other/directory + # + #media_storage_providers: + # - module: file_system + # # Whether to write new local files. + # store_local: false + # # Whether to write new remote media + # store_remote: false + # # Whether to block upload requests waiting for write to this + # # provider to complete + # store_synchronous: false + # config: + # directory: /mnt/some/other/directory # Directory where in-progress uploads are stored. + # uploads_path: "%(uploads_path)s" # The largest allowed upload size in bytes + # max_upload_size: "10M" # Maximum number of pixels that will be thumbnailed + # max_image_pixels: "32M" # Whether to generate new thumbnails on the fly to precisely match @@ -210,9 +215,11 @@ class ContentRepositoryConfig(Config): # a new resolution is requested by the client the server will # generate a new thumbnail. If false the server will pick a thumbnail # from a precalculated list. + # dynamic_thumbnails: false - # List of thumbnail to precalculate when an image is uploaded. + # List of thumbnails to precalculate when an image is uploaded. + # thumbnail_sizes: - width: 32 height: 32 @@ -233,6 +240,7 @@ class ContentRepositoryConfig(Config): # Is the preview URL API enabled? If enabled, you *must* specify # an explicit url_preview_ip_range_blacklist of IPs that the spider is # denied from accessing. + # url_preview_enabled: False # List of IP address CIDR ranges that the URL preview spider is denied @@ -243,16 +251,16 @@ class ContentRepositoryConfig(Config): # synapse to issue arbitrary GET requests to your internal services, # causing serious security issues. # - # url_preview_ip_range_blacklist: - # - '127.0.0.0/8' - # - '10.0.0.0/8' - # - '172.16.0.0/12' - # - '192.168.0.0/16' - # - '100.64.0.0/10' - # - '169.254.0.0/16' - # - '::1/128' - # - 'fe80::/64' - # - 'fc00::/7' + #url_preview_ip_range_blacklist: + # - '127.0.0.0/8' + # - '10.0.0.0/8' + # - '172.16.0.0/12' + # - '192.168.0.0/16' + # - '100.64.0.0/10' + # - '169.254.0.0/16' + # - '::1/128' + # - 'fe80::/64' + # - 'fc00::/7' # # List of IP address CIDR ranges that the URL preview spider is allowed # to access even if they are specified in url_preview_ip_range_blacklist. @@ -260,8 +268,8 @@ class ContentRepositoryConfig(Config): # target IP ranges - e.g. for enabling URL previews for a specific private # website only visible in your network. # - # url_preview_ip_range_whitelist: - # - '192.168.1.1' + #url_preview_ip_range_whitelist: + # - '192.168.1.1' # Optional list of URL matches that the URL preview spider is # denied from accessing. You should use url_preview_ip_range_blacklist @@ -279,26 +287,25 @@ class ContentRepositoryConfig(Config): # specified component matches for a given list item succeed, the URL is # blacklisted. # - # url_preview_url_blacklist: - # # blacklist any URL with a username in its URI - # - username: '*' + #url_preview_url_blacklist: + # # blacklist any URL with a username in its URI + # - username: '*' # - # # blacklist all *.google.com URLs - # - netloc: 'google.com' - # - netloc: '*.google.com' + # # blacklist all *.google.com URLs + # - netloc: 'google.com' + # - netloc: '*.google.com' # - # # blacklist all plain HTTP URLs - # - scheme: 'http' + # # blacklist all plain HTTP URLs + # - scheme: 'http' # - # # blacklist http(s)://www.acme.com/foo - # - netloc: 'www.acme.com' - # path: '/foo' + # # blacklist http(s)://www.acme.com/foo + # - netloc: 'www.acme.com' + # path: '/foo' # - # # blacklist any URL with a literal IPv4 address - # - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' + # # blacklist any URL with a literal IPv4 address + # - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' # The largest allowed URL preview spidering size in bytes max_spider_size: "10M" - """ % locals() diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 9da13ab11b..9b897abe3c 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -20,12 +20,37 @@ from ._base import Config, ConfigError class RoomDirectoryConfig(Config): def read_config(self, config): - alias_creation_rules = config["alias_creation_rules"] + alias_creation_rules = config.get("alias_creation_rules") - self._alias_creation_rules = [ - _AliasRule(rule) - for rule in alias_creation_rules - ] + if alias_creation_rules is not None: + self._alias_creation_rules = [ + _RoomDirectoryRule("alias_creation_rules", rule) + for rule in alias_creation_rules + ] + else: + self._alias_creation_rules = [ + _RoomDirectoryRule( + "alias_creation_rules", { + "action": "allow", + } + ) + ] + + room_list_publication_rules = config.get("room_list_publication_rules") + + if room_list_publication_rules is not None: + self._room_list_publication_rules = [ + _RoomDirectoryRule("room_list_publication_rules", rule) + for rule in room_list_publication_rules + ] + else: + self._room_list_publication_rules = [ + _RoomDirectoryRule( + "room_list_publication_rules", { + "action": "allow", + } + ) + ] def default_config(self, config_dir_path, server_name, **kwargs): return """ @@ -33,60 +58,138 @@ class RoomDirectoryConfig(Config): # on this server. # # The format of this option is a list of rules that contain globs that - # match against user_id and the new alias (fully qualified with server - # name). The action in the first rule that matches is taken, which can - # currently either be "allow" or "deny". + # match against user_id, room_id and the new alias (fully qualified with + # server name). The action in the first rule that matches is taken, + # which can currently either be "allow" or "deny". + # + # Missing user_id/room_id/alias fields default to "*". + # + # If no rules match the request is denied. An empty list means no one + # can create aliases. + # + # Options for the rules include: + # + # user_id: Matches against the creator of the alias + # alias: Matches against the alias being created + # room_id: Matches against the room ID the alias is being pointed at + # action: Whether to "allow" or "deny" the request if the rule matches + # + # The default is: + # + #alias_creation_rules: + # - user_id: "*" + # alias: "*" + # room_id: "*" + # action: allow + + # The `room_list_publication_rules` option controls who can publish and + # which rooms can be published in the public room list. + # + # The format of this option is the same as that for + # `alias_creation_rules`. # - # If no rules match the request is denied. - alias_creation_rules: - - user_id: "*" - alias: "*" - action: allow + # If the room has one or more aliases associated with it, only one of + # the aliases needs to match the alias rule. If there are no aliases + # then only rules with `alias: *` match. + # + # If no rules match the request is denied. An empty list means no one + # can publish rooms. + # + # Options for the rules include: + # + # user_id: Matches agaisnt the creator of the alias + # room_id: Matches against the room ID being published + # alias: Matches against any current local or canonical aliases + # associated with the room + # action: Whether to "allow" or "deny" the request if the rule matches + # + # The default is: + # + #room_list_publication_rules: + # - user_id: "*" + # alias: "*" + # room_id: "*" + # action: allow """ - def is_alias_creation_allowed(self, user_id, alias): + def is_alias_creation_allowed(self, user_id, room_id, alias): """Checks if the given user is allowed to create the given alias Args: user_id (str) + room_id (str) alias (str) Returns: boolean: True if user is allowed to crate the alias """ for rule in self._alias_creation_rules: - if rule.matches(user_id, alias): + if rule.matches(user_id, room_id, [alias]): + return rule.action == "allow" + + return False + + def is_publishing_room_allowed(self, user_id, room_id, aliases): + """Checks if the given user is allowed to publish the room + + Args: + user_id (str) + room_id (str) + aliases (list[str]): any local aliases associated with the room + + Returns: + boolean: True if user can publish room + """ + for rule in self._room_list_publication_rules: + if rule.matches(user_id, room_id, aliases): return rule.action == "allow" return False -class _AliasRule(object): - def __init__(self, rule): +class _RoomDirectoryRule(object): + """Helper class to test whether a room directory action is allowed, like + creating an alias or publishing a room. + """ + + def __init__(self, option_name, rule): + """ + Args: + option_name (str): Name of the config option this rule belongs to + rule (dict): The rule as specified in the config + """ + action = rule["action"] - user_id = rule["user_id"] - alias = rule["alias"] + user_id = rule.get("user_id", "*") + room_id = rule.get("room_id", "*") + alias = rule.get("alias", "*") if action in ("allow", "deny"): self.action = action else: raise ConfigError( - "alias_creation_rules rules can only have action of 'allow'" - " or 'deny'" + "%s rules can only have action of 'allow'" + " or 'deny'" % (option_name,) ) + self._alias_matches_all = alias == "*" + try: self._user_id_regex = glob_to_regex(user_id) self._alias_regex = glob_to_regex(alias) + self._room_id_regex = glob_to_regex(room_id) except Exception as e: raise ConfigError("Failed to parse glob into regex: %s", e) - def matches(self, user_id, alias): - """Tests if this rule matches the given user_id and alias. + def matches(self, user_id, room_id, aliases): + """Tests if this rule matches the given user_id, room_id and aliases. Args: user_id (str) - alias (str) + room_id (str) + aliases (list[str]): The associated aliases to the room. Will be a + single element for testing alias creation, and can be empty for + testing room publishing. Returns: boolean @@ -96,7 +199,22 @@ class _AliasRule(object): if not self._user_id_regex.match(user_id): return False - if not self._alias_regex.match(alias): + if not self._room_id_regex.match(room_id): return False - return True + # We only have alias checks left, so we can short circuit if the alias + # rule matches everything. + if self._alias_matches_all: + return True + + # If we are not given any aliases then this rule only matches if the + # alias glob matches all aliases, which we checked above. + if not aliases: + return False + + # Otherwise, we just need one alias to match + for alias in aliases: + if self._alias_regex.match(alias): + return True + + return False diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py deleted file mode 100644 index 8d7f443021..0000000000 --- a/synapse/config/saml2.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2015 Ericsson -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ._base import Config - - -class SAML2Config(Config): - """SAML2 Configuration - Synapse uses pysaml2 libraries for providing SAML2 support - - config_path: Path to the sp_conf.py configuration file - idp_redirect_url: Identity provider URL which will redirect - the user back to /login/saml2 with proper info. - - sp_conf.py file is something like: - https://github.com/rohe/pysaml2/blob/master/example/sp-repoze/sp_conf.py.example - - More information: https://pythonhosted.org/pysaml2/howto/config.html - """ - - def read_config(self, config): - saml2_config = config.get("saml2_config", None) - if saml2_config: - self.saml2_enabled = saml2_config.get("enabled", True) - self.saml2_config_path = saml2_config["config_path"] - self.saml2_idp_redirect_url = saml2_config["idp_redirect_url"] - else: - self.saml2_enabled = False - self.saml2_config_path = None - self.saml2_idp_redirect_url = None - - def default_config(self, config_dir_path, server_name, **kwargs): - return """ - # Enable SAML2 for registration and login. Uses pysaml2 - # config_path: Path to the sp_conf.py configuration file - # idp_redirect_url: Identity provider URL which will redirect - # the user back to /login/saml2 with proper info. - # See pysaml2 docs for format of config. - #saml2_config: - # enabled: true - # config_path: "%s/sp_conf.py" - # idp_redirect_url: "http://%s/idp" - """ % (config_dir_path, server_name) diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py new file mode 100644 index 0000000000..aff0a1f00c --- /dev/null +++ b/synapse/config/saml2_config.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import Config, ConfigError + + +class SAML2Config(Config): + def read_config(self, config): + self.saml2_enabled = False + + saml2_config = config.get("saml2_config") + + if not saml2_config or not saml2_config.get("enabled", True): + return + + self.saml2_enabled = True + + import saml2.config + self.saml2_sp_config = saml2.config.SPConfig() + self.saml2_sp_config.load(self._default_saml_config_dict()) + self.saml2_sp_config.load(saml2_config.get("sp_config", {})) + + config_path = saml2_config.get("config_path", None) + if config_path is not None: + self.saml2_sp_config.load_file(config_path) + + def _default_saml_config_dict(self): + import saml2 + + public_baseurl = self.public_baseurl + if public_baseurl is None: + raise ConfigError( + "saml2_config requires a public_baseurl to be set" + ) + + metadata_url = public_baseurl + "_matrix/saml2/metadata.xml" + response_url = public_baseurl + "_matrix/saml2/authn_response" + return { + "entityid": metadata_url, + + "service": { + "sp": { + "endpoints": { + "assertion_consumer_service": [ + (response_url, saml2.BINDING_HTTP_POST), + ], + }, + "required_attributes": ["uid"], + "optional_attributes": ["mail", "surname", "givenname"], + }, + } + } + + def default_config(self, config_dir_path, server_name, **kwargs): + return """ + # Enable SAML2 for registration and login. Uses pysaml2. + # + # `sp_config` is the configuration for the pysaml2 Service Provider. + # See pysaml2 docs for format of config. + # + # Default values will be used for the 'entityid' and 'service' settings, + # so it is not normally necessary to specify them unless you need to + # override them. + # + #saml2_config: + # sp_config: + # # point this to the IdP's metadata. You can use either a local file or + # # (preferably) a URL. + # metadata: + # #local: ["saml2/idp.xml"] + # remote: + # - url: https://our_idp/metadata.xml + # + # # The rest of sp_config is just used to generate our metadata xml, and you + # # may well not need it, depending on your setup. Alternatively you + # # may need a whole lot more detail - see the pysaml2 docs! + # + # description: ["My awesome SP", "en"] + # name: ["Test SP", "en"] + # + # organization: + # name: Example com + # display_name: + # - ["Example co", "en"] + # url: "http://example.com" + # + # contact_person: + # - given_name: Bob + # sur_name: "the Sysadmin" + # email_address": ["admin@example.com"] + # contact_type": technical + # + # # Instead of putting the config inline as above, you can specify a + # # separate pysaml2 configuration file: + # # + # config_path: "%(config_dir_path)s/sp_conf.py" + """ % {"config_dir_path": config_dir_path} diff --git a/synapse/config/server.py b/synapse/config/server.py index c1c7c0105e..4200f10da3 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2017 New Vector Ltd +# Copyright 2017-2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,13 +15,23 @@ # limitations under the License. import logging +import os.path from synapse.http.endpoint import parse_and_validate_server_name +from synapse.python_dependencies import DependencyException, check_requirements from ._base import Config, ConfigError logger = logging.Logger(__name__) +# by default, we attempt to listen on both '::' *and* '0.0.0.0' because some OSes +# (Windows, macOS, other BSD/Linux where net.ipv6.bindv6only is set) will only listen +# on IPv6 when '::' is set. +# +# We later check for errors when binding to 0.0.0.0 and ignore them if :: is also in +# in the list. +DEFAULT_BIND_ADDRESSES = ['::', '0.0.0.0'] + class ServerConfig(Config): @@ -34,7 +44,6 @@ class ServerConfig(Config): raise ConfigError(str(e)) self.pid_file = self.abspath(config.get("pid_file")) - self.web_client = config["web_client"] self.web_client_location = config.get("web_client_location", None) self.soft_file_limit = config["soft_file_limit"] self.daemonize = config.get("daemonize") @@ -62,6 +71,11 @@ class ServerConfig(Config): # master, potentially causing inconsistency. self.enable_media_repo = config.get("enable_media_repo", True) + # whether to enable search. If disabled, new entries will not be inserted + # into the search tables and they will not be indexed. Users will receive + # errors when attempting to search for messages. + self.enable_search = config.get("enable_search", True) + self.filter_timeline_limit = config.get("filter_timeline_limit", -1) # Whether we should block invites sent to users on this server @@ -77,6 +91,7 @@ class ServerConfig(Config): self.max_mau_value = config.get( "max_mau_value", 0, ) + self.mau_stats_only = config.get("mau_stats_only", False) self.mau_limits_reserved_threepids = config.get( "mau_limit_reserved_threepids", [] @@ -111,27 +126,53 @@ class ServerConfig(Config): self.public_baseurl += '/' self.start_pushers = config.get("start_pushers", True) - self.listeners = config.get("listeners", []) + self.listeners = [] + for listener in config.get("listeners", []): + if not isinstance(listener.get("port", None), int): + raise ConfigError( + "Listener configuration is lacking a valid 'port' option" + ) + + if listener.setdefault("tls", False): + # no_tls is not really supported any more, but let's grandfather it in + # here. + if config.get("no_tls", False): + logger.info( + "Ignoring TLS-enabled listener on port %i due to no_tls" + ) + continue - for listener in self.listeners: bind_address = listener.pop("bind_address", None) bind_addresses = listener.setdefault("bind_addresses", []) + # if bind_address was specified, add it to the list of addresses if bind_address: bind_addresses.append(bind_address) - elif not bind_addresses: - bind_addresses.append('') + + # if we still have an empty list of addresses, use the default list + if not bind_addresses: + if listener['type'] == 'metrics': + # the metrics listener doesn't support IPv6 + bind_addresses.append('0.0.0.0') + else: + bind_addresses.extend(DEFAULT_BIND_ADDRESSES) + + self.listeners.append(listener) + + if not self.web_client_location: + _warn_if_webclient_configured(self.listeners) self.gc_thresholds = read_gc_thresholds(config.get("gc_thresholds", None)) bind_port = config.get("bind_port") if bind_port: + if config.get("no_tls", False): + raise ConfigError("no_tls is incompatible with bind_port") + self.listeners = [] bind_host = config.get("bind_host", "") gzip_responses = config.get("gzip_responses", True) - names = ["client", "webclient"] if self.web_client else ["client"] - self.listeners.append({ "port": bind_port, "bind_addresses": [bind_host], @@ -139,7 +180,7 @@ class ServerConfig(Config): "type": "http", "resources": [ { - "names": names, + "names": ["client"], "compress": gzip_responses, }, { @@ -158,7 +199,7 @@ class ServerConfig(Config): "type": "http", "resources": [ { - "names": names, + "names": ["client"], "compress": gzip_responses, }, { @@ -174,6 +215,7 @@ class ServerConfig(Config): "port": manhole, "bind_addresses": ["127.0.0.1"], "type": "manhole", + "tls": False, }) metrics_port = config.get("metrics_port") @@ -197,7 +239,12 @@ class ServerConfig(Config): ] }) - def default_config(self, server_name, **kwargs): + _check_resource_config(self.listeners) + + def has_tls_listener(self): + return any(l["tls"] for l in self.listeners) + + def default_config(self, server_name, data_dir_path, **kwargs): _, bind_port = parse_and_validate_server_name(server_name) if bind_port is not None: unsecure_port = bind_port - 400 @@ -205,7 +252,7 @@ class ServerConfig(Config): bind_port = 8448 unsecure_port = 8008 - pid_file = self.abspath("homeserver.pid") + pid_file = os.path.join(data_dir_path, "homeserver.pid") return """\ ## Server ## @@ -239,19 +286,20 @@ class ServerConfig(Config): # # This setting requires the affinity package to be installed! # - # cpu_affinity: 0xFFFFFFFF - - # Whether to serve a web client from the HTTP/HTTPS root resource. - web_client: True + #cpu_affinity: 0xFFFFFFFF - # The root directory to server for the above web client. - # If left undefined, synapse will serve the matrix-angular-sdk web client. - # Make sure matrix-angular-sdk is installed with pip if web_client is True - # and web_client_location is undefined - # web_client_location: "/path/to/web/root" + # The path to the web client which will be served at /_matrix/client/ + # if 'webclient' is configured under the 'listeners' configuration. + # + #web_client_location: "/path/to/web/root" - # The public-facing base URL for the client API (not including _matrix/...) - # public_baseurl: https://example.com:8448/ + # The public-facing base URL that clients use to access this HS + # (not including _matrix/...). This is the same URL a user would + # enter into the 'custom HS URL' field on their client. If you + # use synapse with a reverse proxy, this should be the URL to reach + # synapse via the proxy. + # + #public_baseurl: https://example.com/ # Set the soft limit on the number of file descriptors synapse can use # Zero is used to indicate synapse should set the soft limit to the @@ -262,15 +310,25 @@ class ServerConfig(Config): use_presence: true # The GC threshold parameters to pass to `gc.set_threshold`, if defined - # gc_thresholds: [700, 10, 10] + # + #gc_thresholds: [700, 10, 10] # Set the limit on the returned events in the timeline in the get # and sync operations. The default value is -1, means no upper limit. - # filter_timeline_limit: 5000 + # + #filter_timeline_limit: 5000 # Whether room invites to users on this server should be blocked # (except those sent by local server admins). The default is False. - # block_non_admin_invites: True + # + #block_non_admin_invites: True + + # Room searching + # + # If disabled, new messages will not be indexed for searching and users + # will receive errors when searching for messages. Defaults to enabled. + # + #enable_search: false # Restrict federation to the following whitelist of domains. # N.B. we recommend also firewalling your federation listener to limit @@ -278,107 +336,145 @@ class ServerConfig(Config): # purely on this application-layer restriction. If not specified, the # default is to whitelist everything. # - # federation_domain_whitelist: + #federation_domain_whitelist: # - lon.example.com # - nyc.example.com # - syd.example.com # List of ports that Synapse should listen on, their purpose and their # configuration. + # + # Options for each listener include: + # + # port: the TCP port to bind to + # + # bind_addresses: a list of local addresses to listen on. The default is + # 'all local interfaces'. + # + # type: the type of listener. Normally 'http', but other valid options are: + # 'manhole' (see docs/manhole.md), + # 'metrics' (see docs/metrics-howto.rst), + # 'replication' (see docs/workers.rst). + # + # tls: set to true to enable TLS for this listener. Will use the TLS + # key/cert specified in tls_private_key_path / tls_certificate_path. + # + # x_forwarded: Only valid for an 'http' listener. Set to true to use the + # X-Forwarded-For header as the client IP. Useful when Synapse is + # behind a reverse-proxy. + # + # resources: Only valid for an 'http' listener. A list of resources to host + # on this port. Options for each resource are: + # + # names: a list of names of HTTP resources. See below for a list of + # valid resource names. + # + # compress: set to true to enable HTTP comression for this resource. + # + # additional_resources: Only valid for an 'http' listener. A map of + # additional endpoints which should be loaded via dynamic modules. + # + # Valid resource names are: + # + # client: the client-server API (/_matrix/client). Also implies 'media' and + # 'static'. + # + # consent: user consent forms (/_matrix/consent). See + # docs/consent_tracking.md. + # + # federation: the server-server API (/_matrix/federation). Also implies + # 'media', 'keys', 'openid' + # + # keys: the key discovery API (/_matrix/keys). + # + # media: the media API (/_matrix/media). + # + # metrics: the metrics interface. See docs/metrics-howto.rst. + # + # openid: OpenID authentication. + # + # replication: the HTTP replication API (/_synapse/replication). See + # docs/workers.rst. + # + # static: static resources under synapse/static (/_matrix/static). (Mostly + # useful for 'fallback authentication'.) + # + # webclient: A web client. Requires web_client_location to be set. + # listeners: - # Main HTTPS listener - # For when matrix traffic is sent directly to synapse. - - - # The port to listen for HTTPS requests on. - port: %(bind_port)s - - # Local addresses to listen on. - # On Linux and Mac OS, `::` will listen on all IPv4 and IPv6 - # addresses by default. For most other OSes, this will only listen - # on IPv6. - bind_addresses: - - '::' - - '0.0.0.0' - - # This is a 'http' listener, allows us to specify 'resources'. - type: http - - tls: true - - # Use the X-Forwarded-For (XFF) header as the client IP and not the - # actual client IP. - x_forwarded: false - - # List of HTTP resources to serve on this listener. - resources: - - - # List of resources to host on this listener. - names: - - client # The client-server APIs, both v1 and v2 - - webclient # The bundled webclient. - - # Should synapse compress HTTP responses to clients that support it? - # This should be disabled if running synapse behind a load balancer - # that can do automatic compression. - compress: true - - - names: [federation] # Federation APIs - compress: false - - # optional list of additional endpoints which can be loaded via - # dynamic modules - # additional_resources: - # "/_matrix/my/custom/endpoint": - # module: my_module.CustomRequestHandler - # config: {} - - # Unsecure HTTP listener, - # For when matrix traffic passes through loadbalancer that unwraps TLS. + # TLS-enabled listener: for when matrix traffic is sent directly to synapse. + # + # Disabled by default. To enable it, uncomment the following. (Note that you + # will also need to give Synapse a TLS key and certificate: see the TLS section + # below.) + # + #- port: %(bind_port)s + # type: http + # tls: true + # resources: + # - names: [client, federation] + + # Unsecure HTTP listener: for when matrix traffic passes through a reverse proxy + # that unwraps TLS. + # + # If you plan to use a reverse proxy, please see + # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. + # - port: %(unsecure_port)s tls: false - bind_addresses: ['::', '0.0.0.0'] + bind_addresses: ['::1', '127.0.0.1'] type: http - - x_forwarded: false + x_forwarded: true resources: - - names: [client, webclient] - compress: true - - names: [federation] + - names: [client, federation] compress: false + # example additonal_resources: + # + #additional_resources: + # "/_matrix/my/custom/endpoint": + # module: my_module.CustomRequestHandler + # config: {} + # Turn on the twisted ssh manhole service on localhost on the given # port. - # - port: 9000 - # bind_addresses: ['::1', '127.0.0.1'] - # type: manhole + # + #- port: 9000 + # bind_addresses: ['::1', '127.0.0.1'] + # type: manhole - # Homeserver blocking - # - # How to reach the server admin, used in ResourceLimitError - # admin_contact: 'mailto:admin@server.com' - # - # Global block config - # - # hs_disabled: False - # hs_disabled_message: 'Human readable reason for why the HS is blocked' - # hs_disabled_limit_type: 'error code(str), to help clients decode reason' - # - # Monthly Active User Blocking - # - # Enables monthly active user checking - # limit_usage_by_mau: False - # max_mau_value: 50 - # mau_trial_days: 2 - # - # Sometimes the server admin will want to ensure certain accounts are - # never blocked by mau checking. These accounts are specified here. - # - # mau_limit_reserved_threepids: - # - medium: 'email' - # address: 'reserved_user@example.com' + ## Homeserver blocking ## + # How to reach the server admin, used in ResourceLimitError + # + #admin_contact: 'mailto:admin@server.com' + + # Global blocking + # + #hs_disabled: False + #hs_disabled_message: 'Human readable reason for why the HS is blocked' + #hs_disabled_limit_type: 'error code(str), to help clients decode reason' + + # Monthly Active User Blocking + # + #limit_usage_by_mau: False + #max_mau_value: 50 + #mau_trial_days: 2 + + # If enabled, the metrics for the number of monthly active users will + # be populated, however no one will be limited. If limit_usage_by_mau + # is true, this is implied to be true. + # + #mau_stats_only: False + + # Sometimes the server admin will want to ensure certain accounts are + # never blocked by mau checking. These accounts are specified here. + # + #mau_limit_reserved_threepids: + # - medium: 'email' + # address: 'reserved_user@example.com' """ % locals() def read_arguments(self, args): @@ -404,19 +500,18 @@ class ServerConfig(Config): " service on the given port.") -def is_threepid_reserved(config, threepid): +def is_threepid_reserved(reserved_threepids, threepid): """Check the threepid against the reserved threepid config Args: - config(ServerConfig) - to access server config attributes + reserved_threepids([dict]) - list of reserved threepids threepid(dict) - The threepid to test for Returns: boolean Is the threepid undertest reserved_user """ - for tp in config.mau_limits_reserved_threepids: - if (threepid['medium'] == tp['medium'] - and threepid['address'] == tp['address']): + for tp in reserved_threepids: + if (threepid['medium'] == tp['medium'] and threepid['address'] == tp['address']): return True return False @@ -436,3 +531,53 @@ def read_gc_thresholds(thresholds): raise ConfigError( "Value of `gc_threshold` must be a list of three integers if set" ) + + +NO_MORE_WEB_CLIENT_WARNING = """ +Synapse no longer includes a web client. To enable a web client, configure +web_client_location. To remove this warning, remove 'webclient' from the 'listeners' +configuration. +""" + + +def _warn_if_webclient_configured(listeners): + for listener in listeners: + for res in listener.get("resources", []): + for name in res.get("names", []): + if name == 'webclient': + logger.warning(NO_MORE_WEB_CLIENT_WARNING) + return + + +KNOWN_RESOURCES = ( + 'client', + 'consent', + 'federation', + 'keys', + 'media', + 'metrics', + 'openid', + 'replication', + 'static', + 'webclient', +) + + +def _check_resource_config(listeners): + resource_names = set( + res_name + for listener in listeners + for res in listener.get("resources", []) + for res_name in res.get("names", []) + ) + + for resource in resource_names: + if resource not in KNOWN_RESOURCES: + raise ConfigError( + "Unknown listener resource '%s'" % (resource, ) + ) + if resource == "consent": + try: + check_requirements('resources.consent') + except DependencyException as e: + raise ConfigError(e.message) diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index 3c39850ac6..529dc0a617 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -30,11 +30,11 @@ DEFAULT_CONFIG = """\ # It's also possible to override the room name, the display name of the # "notices" user, and the avatar for the user. # -# server_notices: -# system_mxid_localpart: notices -# system_mxid_display_name: "Server Notices" -# system_mxid_avatar_url: "mxc://server.com/oumMVlgDnLYFaPVkExemNVVZ" -# room_name: "Server Notices" +#server_notices: +# system_mxid_localpart: notices +# system_mxid_display_name: "Server Notices" +# system_mxid_avatar_url: "mxc://server.com/oumMVlgDnLYFaPVkExemNVVZ" +# room_name: "Server Notices" """ diff --git a/synapse/config/spam_checker.py b/synapse/config/spam_checker.py index 3fec42bdb0..1502e9faba 100644 --- a/synapse/config/spam_checker.py +++ b/synapse/config/spam_checker.py @@ -28,8 +28,8 @@ class SpamCheckerConfig(Config): def default_config(self, **kwargs): return """\ - # spam_checker: - # module: "my_custom_project.SuperSpamChecker" - # config: - # example_option: 'things' + #spam_checker: + # module: "my_custom_project.SuperSpamChecker" + # config: + # example_option: 'things' """ diff --git a/synapse/config/tls.py b/synapse/config/tls.py index fef1ea99cb..40045de7ac 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -13,51 +13,62 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os -import subprocess +import warnings +from datetime import datetime from hashlib import sha256 +import six + from unpaddedbase64 import encode_base64 from OpenSSL import crypto -from ._base import Config +from synapse.config._base import Config, ConfigError -GENERATE_DH_PARAMS = False +logger = logging.getLogger(__name__) class TlsConfig(Config): def read_config(self, config): - self.tls_certificate = self.read_tls_certificate( - config.get("tls_certificate_path") - ) - self.tls_certificate_file = config.get("tls_certificate_path") - self.no_tls = config.get("no_tls", False) + acme_config = config.get("acme", None) + if acme_config is None: + acme_config = {} - if self.no_tls: - self.tls_private_key = None - else: - self.tls_private_key = self.read_tls_private_key( - config.get("tls_private_key_path") - ) + self.acme_enabled = acme_config.get("enabled", False) - self.tls_dh_params_path = self.check_file( - config.get("tls_dh_params_path"), "tls_dh_params" - ) + # hyperlink complains on py2 if this is not a Unicode + self.acme_url = six.text_type(acme_config.get( + "url", u"https://acme-v01.api.letsencrypt.org/directory" + )) + self.acme_port = acme_config.get("port", 80) + self.acme_bind_addresses = acme_config.get("bind_addresses", ['::', '0.0.0.0']) + self.acme_reprovision_threshold = acme_config.get("reprovision_threshold", 30) + self.acme_domain = acme_config.get("domain", config.get("server_name")) - self.tls_fingerprints = config["tls_fingerprints"] + self.tls_certificate_file = self.abspath(config.get("tls_certificate_path")) + self.tls_private_key_file = self.abspath(config.get("tls_private_key_path")) - # Check that our own certificate is included in the list of fingerprints - # and include it if it is not. - x509_certificate_bytes = crypto.dump_certificate( - crypto.FILETYPE_ASN1, - self.tls_certificate - ) - sha256_fingerprint = encode_base64(sha256(x509_certificate_bytes).digest()) - sha256_fingerprints = set(f["sha256"] for f in self.tls_fingerprints) - if sha256_fingerprint not in sha256_fingerprints: - self.tls_fingerprints.append({u"sha256": sha256_fingerprint}) + if self.has_tls_listener(): + if not self.tls_certificate_file: + raise ConfigError( + "tls_certificate_path must be specified if TLS-enabled listeners are " + "configured." + ) + if not self.tls_private_key_file: + raise ConfigError( + "tls_private_key_path must be specified if TLS-enabled listeners are " + "configured." + ) + + self._original_tls_fingerprints = config.get("tls_fingerprints", []) + + if self._original_tls_fingerprints is None: + self._original_tls_fingerprints = [] + + self.tls_fingerprints = list(self._original_tls_fingerprints) # This config option applies to non-federation HTTP clients # (e.g. for talking to recaptcha, identity servers, and such) @@ -67,29 +78,176 @@ class TlsConfig(Config): "use_insecure_ssl_client_just_for_testing_do_not_use" ) + self.tls_certificate = None + self.tls_private_key = None + + def is_disk_cert_valid(self, allow_self_signed=True): + """ + Is the certificate we have on disk valid, and if so, for how long? + + Args: + allow_self_signed (bool): Should we allow the certificate we + read to be self signed? + + Returns: + int: Days remaining of certificate validity. + None: No certificate exists. + """ + if not os.path.exists(self.tls_certificate_file): + return None + + try: + with open(self.tls_certificate_file, 'rb') as f: + cert_pem = f.read() + except Exception: + logger.exception("Failed to read existing certificate off disk!") + raise + + try: + tls_certificate = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) + except Exception: + logger.exception("Failed to parse existing certificate off disk!") + raise + + if not allow_self_signed: + if tls_certificate.get_subject() == tls_certificate.get_issuer(): + raise ValueError( + "TLS Certificate is self signed, and this is not permitted" + ) + + # YYYYMMDDhhmmssZ -- in UTC + expires_on = datetime.strptime( + tls_certificate.get_notAfter().decode('ascii'), "%Y%m%d%H%M%SZ" + ) + now = datetime.utcnow() + days_remaining = (expires_on - now).days + return days_remaining + + def read_certificate_from_disk(self, require_cert_and_key): + """ + Read the certificates and private key from disk. + + Args: + require_cert_and_key (bool): set to True to throw an error if the certificate + and key file are not given + """ + if require_cert_and_key: + self.tls_private_key = self.read_tls_private_key() + self.tls_certificate = self.read_tls_certificate() + elif self.tls_certificate_file: + # we only need the certificate for the tls_fingerprints. Reload it if we + # can, but it's not a fatal error if we can't. + try: + self.tls_certificate = self.read_tls_certificate() + except Exception as e: + logger.info( + "Unable to read TLS certificate (%s). Ignoring as no " + "tls listeners enabled.", e, + ) + + self.tls_fingerprints = list(self._original_tls_fingerprints) + + if self.tls_certificate: + # Check that our own certificate is included in the list of fingerprints + # and include it if it is not. + x509_certificate_bytes = crypto.dump_certificate( + crypto.FILETYPE_ASN1, self.tls_certificate + ) + sha256_fingerprint = encode_base64(sha256(x509_certificate_bytes).digest()) + sha256_fingerprints = set(f["sha256"] for f in self.tls_fingerprints) + if sha256_fingerprint not in sha256_fingerprints: + self.tls_fingerprints.append({u"sha256": sha256_fingerprint}) + def default_config(self, config_dir_path, server_name, **kwargs): base_key_name = os.path.join(config_dir_path, server_name) tls_certificate_path = base_key_name + ".tls.crt" tls_private_key_path = base_key_name + ".tls.key" - tls_dh_params_path = base_key_name + ".tls.dh" - return """\ - # PEM encoded X509 certificate for TLS. - # You can replace the self-signed certificate that synapse - # autogenerates on launch with your own SSL certificate + key pair - # if you like. Any required intermediary certificates can be - # appended after the primary certificate in hierarchical order. - tls_certificate_path: "%(tls_certificate_path)s" + # this is to avoid the max line length. Sorrynotsorry + proxypassline = ( + 'ProxyPass /.well-known/acme-challenge ' + 'http://localhost:8009/.well-known/acme-challenge' + ) + + return ( + """\ + ## TLS ## + + # PEM-encoded X509 certificate for TLS. + # This certificate, as of Synapse 1.0, will need to be a valid and verifiable + # certificate, signed by a recognised Certificate Authority. + # + # See 'ACME support' below to enable auto-provisioning this certificate via + # Let's Encrypt. + # + #tls_certificate_path: "%(tls_certificate_path)s" + + # PEM-encoded private key for TLS + # + #tls_private_key_path: "%(tls_private_key_path)s" - # PEM encoded private key for TLS - tls_private_key_path: "%(tls_private_key_path)s" + # ACME support: This will configure Synapse to request a valid TLS certificate + # for your configured `server_name` via Let's Encrypt. + # + # Note that provisioning a certificate in this way requires port 80 to be + # routed to Synapse so that it can complete the http-01 ACME challenge. + # By default, if you enable ACME support, Synapse will attempt to listen on + # port 80 for incoming http-01 challenges - however, this will likely fail + # with 'Permission denied' or a similar error. + # + # There are a couple of potential solutions to this: + # + # * If you already have an Apache, Nginx, or similar listening on port 80, + # you can configure Synapse to use an alternate port, and have your web + # server forward the requests. For example, assuming you set 'port: 8009' + # below, on Apache, you would write: + # + # %(proxypassline)s + # + # * Alternatively, you can use something like `authbind` to give Synapse + # permission to listen on port 80. + # + acme: + # ACME support is disabled by default. Uncomment the following line + # (and tls_certificate_path and tls_private_key_path above) to enable it. + # + #enabled: true - # PEM dh parameters for ephemeral keys - tls_dh_params_path: "%(tls_dh_params_path)s" + # Endpoint to use to request certificates. If you only want to test, + # use Let's Encrypt's staging url: + # https://acme-staging.api.letsencrypt.org/directory + # + #url: https://acme-v01.api.letsencrypt.org/directory - # Don't bind to the https port - no_tls: False + # Port number to listen on for the HTTP-01 challenge. Change this if + # you are forwarding connections through Apache/Nginx/etc. + # + #port: 80 + + # Local addresses to listen on for incoming connections. + # Again, you may want to change this if you are forwarding connections + # through Apache/Nginx/etc. + # + #bind_addresses: ['::', '0.0.0.0'] + + # How many days remaining on a certificate before it is renewed. + # + #reprovision_threshold: 30 + + # The domain that the certificate should be for. Normally this + # should be the same as your Matrix domain (i.e., 'server_name'), but, + # by putting a file at 'https://<server_name>/.well-known/matrix/server', + # you can delegate incoming traffic to another server. If you do that, + # you should give the target of the delegation here. + # + # For example: if your 'server_name' is 'example.com', but + # 'https://example.com/.well-known/matrix/server' delegates to + # 'matrix.example.com', you should put 'matrix.example.com' here. + # + # If not set, defaults to your 'server_name'. + # + #domain: matrix.example.com # List of allowed TLS fingerprints for this server to publish along # with the signing keys for this server. Other matrix servers that @@ -116,80 +274,44 @@ class TlsConfig(Config): # openssl x509 -outform DER | openssl sha256 -binary | base64 | tr -d '=' # or by checking matrix.org/federationtester/api/report?server_name=$host # - tls_fingerprints: [] - # tls_fingerprints: [{"sha256": "<base64_encoded_sha256_fingerprint>"}] - """ % locals() + #tls_fingerprints: [{"sha256": "<base64_encoded_sha256_fingerprint>"}] - def read_tls_certificate(self, cert_path): - cert_pem = self.read_file(cert_path, "tls_certificate") - return crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) + """ + % locals() + ) - def read_tls_private_key(self, private_key_path): - private_key_pem = self.read_file(private_key_path, "tls_private_key") - return crypto.load_privatekey(crypto.FILETYPE_PEM, private_key_pem) + def read_tls_certificate(self): + """Reads the TLS certificate from the configured file, and returns it - def generate_files(self, config): - tls_certificate_path = config["tls_certificate_path"] - tls_private_key_path = config["tls_private_key_path"] - tls_dh_params_path = config["tls_dh_params_path"] - - if not self.path_exists(tls_private_key_path): - with open(tls_private_key_path, "wb") as private_key_file: - tls_private_key = crypto.PKey() - tls_private_key.generate_key(crypto.TYPE_RSA, 2048) - private_key_pem = crypto.dump_privatekey( - crypto.FILETYPE_PEM, tls_private_key - ) - private_key_file.write(private_key_pem) - else: - with open(tls_private_key_path) as private_key_file: - private_key_pem = private_key_file.read() - tls_private_key = crypto.load_privatekey( - crypto.FILETYPE_PEM, private_key_pem + Also checks if it is self-signed, and warns if so + + Returns: + OpenSSL.crypto.X509: the certificate + """ + cert_path = self.tls_certificate_file + logger.info("Loading TLS certificate from %s", cert_path) + cert_pem = self.read_file(cert_path, "tls_certificate_path") + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) + + # Check if it is self-signed, and issue a warning if so. + if cert.get_issuer() == cert.get_subject(): + warnings.warn( + ( + "Self-signed TLS certificates will not be accepted by Synapse 1.0. " + "Please either provide a valid certificate, or use Synapse's ACME " + "support to provision one." ) + ) - if not self.path_exists(tls_certificate_path): - with open(tls_certificate_path, "wb") as certificate_file: - cert = crypto.X509() - subject = cert.get_subject() - subject.CN = config["server_name"] - - cert.set_serial_number(1000) - cert.gmtime_adj_notBefore(0) - cert.gmtime_adj_notAfter(10 * 365 * 24 * 60 * 60) - cert.set_issuer(cert.get_subject()) - cert.set_pubkey(tls_private_key) - - cert.sign(tls_private_key, 'sha256') - - cert_pem = crypto.dump_certificate(crypto.FILETYPE_PEM, cert) - - certificate_file.write(cert_pem) - - if not self.path_exists(tls_dh_params_path): - if GENERATE_DH_PARAMS: - subprocess.check_call([ - "openssl", "dhparam", - "-outform", "PEM", - "-out", tls_dh_params_path, - "2048" - ]) - else: - with open(tls_dh_params_path, "w") as dh_params_file: - dh_params_file.write( - "2048-bit DH parameters taken from rfc3526\n" - "-----BEGIN DH PARAMETERS-----\n" - "MIIBCAKCAQEA///////////JD9qiIWjC" - "NMTGYouA3BzRKQJOCIpnzHQCC76mOxOb\n" - "IlFKCHmONATd75UZs806QxswKwpt8l8U" - "N0/hNW1tUcJF5IW1dmJefsb0TELppjft\n" - "awv/XLb0Brft7jhr+1qJn6WunyQRfEsf" - "5kkoZlHs5Fs9wgB8uKFjvwWY2kg2HFXT\n" - "mmkWP6j9JM9fg2VdI9yjrZYcYvNWIIVS" - "u57VKQdwlpZtZww1Tkq8mATxdGwIyhgh\n" - "fDKQXkYuNs474553LBgOhgObJ4Oi7Aei" - "j7XFXfBvTFLJ3ivL9pVYFxg5lUl86pVq\n" - "5RXSJhiY+gUQFXKOWoqsqmj/////////" - "/wIBAg==\n" - "-----END DH PARAMETERS-----\n" - ) + return cert + + def read_tls_private_key(self): + """Reads the TLS private key from the configured file, and returns it + + Returns: + OpenSSL.crypto.PKey: the private key + """ + private_key_path = self.tls_private_key_file + logger.info("Loading TLS key from %s", private_key_path) + private_key_pem = self.read_file(private_key_path, "tls_private_key_path") + return crypto.load_privatekey(crypto.FILETYPE_PEM, private_key_pem) diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index 38e8947843..fab3a7d1c8 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -40,5 +40,5 @@ class UserDirectoryConfig(Config): # on your database to tell it to rebuild the user_directory search indexes. # #user_directory: - # search_all_users: false + # search_all_users: false """ diff --git a/synapse/config/voip.py b/synapse/config/voip.py index d07bd24ffd..257f7c86e7 100644 --- a/synapse/config/voip.py +++ b/synapse/config/voip.py @@ -27,20 +27,24 @@ class VoipConfig(Config): def default_config(self, **kwargs): return """\ - ## Turn ## + ## TURN ## # The public URIs of the TURN server to give to clients + # #turn_uris: [] # The shared secret used to compute passwords for the TURN server + # #turn_shared_secret: "YOUR_SHARED_SECRET" # The Username and password if the TURN server needs them and # does not use a token + # #turn_username: "TURNSERVER_USERNAME" #turn_password: "TURNSERVER_PASSWORD" # How long generated TURN credentials last + # turn_user_lifetime: "1h" # Whether guests should be allowed to use the TURN server. @@ -48,5 +52,6 @@ class VoipConfig(Config): # However, it does introduce a slight security risk as it allows users to # connect to arbitrary endpoints without having first signed up for a # valid account (e.g. by passing a CAPTCHA). + # turn_allow_guests: True """ diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index 02b76dfcfb..49cbc7098f 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import logging from zope.interface import implementer from OpenSSL import SSL, crypto from twisted.internet._sslverify import _defaultCurveName +from twisted.internet.abstract import isIPAddress, isIPv6Address from twisted.internet.interfaces import IOpenSSLClientConnectionCreator from twisted.internet.ssl import CertificateOptions, ContextFactory from twisted.python.failure import Failure @@ -42,12 +45,12 @@ class ServerContextFactory(ContextFactory): logger.exception("Failed to enable elliptic curve for TLS") context.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3) context.use_certificate_chain_file(config.tls_certificate_file) + context.use_privatekey(config.tls_private_key) - if not config.no_tls: - context.use_privatekey(config.tls_private_key) - - context.load_tmp_dh(config.tls_dh_params_path) - context.set_cipher_list("!ADH:HIGH+kEDH:!AECDH:HIGH+kEECDH") + # https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ + context.set_cipher_list( + "ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES256:ECDH+AES128:!aNULL:!SHA1" + ) def getContext(self): return self._context @@ -96,11 +99,15 @@ class ClientTLSOptions(object): def __init__(self, hostname, ctx): self._ctx = ctx - self._hostname = hostname - self._hostnameBytes = _idnaBytes(hostname) - ctx.set_info_callback( - _tolerateErrors(self._identityVerifyingInfoCallback) - ) + + if isIPAddress(hostname) or isIPv6Address(hostname): + self._hostnameBytes = hostname.encode('ascii') + self._sendSNI = False + else: + self._hostnameBytes = _idnaBytes(hostname) + self._sendSNI = True + + ctx.set_info_callback(_tolerateErrors(self._identityVerifyingInfoCallback)) def clientConnectionForTLS(self, tlsProtocol): context = self._ctx @@ -109,7 +116,9 @@ class ClientTLSOptions(object): return connection def _identityVerifyingInfoCallback(self, connection, where, ret): - if where & SSL.SSL_CB_HANDSHAKE_START: + # Literal IPv4 and IPv6 addresses are not permitted + # as host names according to the RFCs + if where & SSL.SSL_CB_HANDSHAKE_START and self._sendSNI: connection.set_tlsext_host_name(self._hostnameBytes) @@ -119,10 +128,8 @@ class ClientTLSOptionsFactory(object): def __init__(self, config): # We don't use config options yet - pass + self._options = CertificateOptions(verify=False) def get_options(self, host): - return ClientTLSOptions( - host, - CertificateOptions(verify=False).getContext() - ) + # Use _makeContext so that we get a fresh OpenSSL CTX each time. + return ClientTLSOptions(host, self._options._makeContext()) diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index 8774b28967..1dfa727fcf 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -23,14 +23,14 @@ from signedjson.sign import sign_json from unpaddedbase64 import decode_base64, encode_base64 from synapse.api.errors import Codes, SynapseError -from synapse.events.utils import prune_event +from synapse.events.utils import prune_event, prune_event_dict logger = logging.getLogger(__name__) def check_event_content_hash(event, hash_algorithm=hashlib.sha256): """Check whether the hash for this PDU matches the contents""" - name, expected_hash = compute_content_hash(event, hash_algorithm) + name, expected_hash = compute_content_hash(event.get_pdu_json(), hash_algorithm) logger.debug("Expecting hash: %s", encode_base64(expected_hash)) # some malformed events lack a 'hashes'. Protect against it being missing @@ -59,35 +59,70 @@ def check_event_content_hash(event, hash_algorithm=hashlib.sha256): return message_hash_bytes == expected_hash -def compute_content_hash(event, hash_algorithm): - event_json = event.get_pdu_json() - event_json.pop("age_ts", None) - event_json.pop("unsigned", None) - event_json.pop("signatures", None) - event_json.pop("hashes", None) - event_json.pop("outlier", None) - event_json.pop("destinations", None) +def compute_content_hash(event_dict, hash_algorithm): + """Compute the content hash of an event, which is the hash of the + unredacted event. - event_json_bytes = encode_canonical_json(event_json) + Args: + event_dict (dict): The unredacted event as a dict + hash_algorithm: A hasher from `hashlib`, e.g. hashlib.sha256, to use + to hash the event + + Returns: + tuple[str, bytes]: A tuple of the name of hash and the hash as raw + bytes. + """ + event_dict = dict(event_dict) + event_dict.pop("age_ts", None) + event_dict.pop("unsigned", None) + event_dict.pop("signatures", None) + event_dict.pop("hashes", None) + event_dict.pop("outlier", None) + event_dict.pop("destinations", None) + + event_json_bytes = encode_canonical_json(event_dict) hashed = hash_algorithm(event_json_bytes) return (hashed.name, hashed.digest()) def compute_event_reference_hash(event, hash_algorithm=hashlib.sha256): + """Computes the event reference hash. This is the hash of the redacted + event. + + Args: + event (FrozenEvent) + hash_algorithm: A hasher from `hashlib`, e.g. hashlib.sha256, to use + to hash the event + + Returns: + tuple[str, bytes]: A tuple of the name of hash and the hash as raw + bytes. + """ tmp_event = prune_event(event) - event_json = tmp_event.get_pdu_json() - event_json.pop("signatures", None) - event_json.pop("age_ts", None) - event_json.pop("unsigned", None) - event_json_bytes = encode_canonical_json(event_json) + event_dict = tmp_event.get_pdu_json() + event_dict.pop("signatures", None) + event_dict.pop("age_ts", None) + event_dict.pop("unsigned", None) + event_json_bytes = encode_canonical_json(event_dict) hashed = hash_algorithm(event_json_bytes) return (hashed.name, hashed.digest()) -def compute_event_signature(event, signature_name, signing_key): - tmp_event = prune_event(event) - redact_json = tmp_event.get_pdu_json() +def compute_event_signature(event_dict, signature_name, signing_key): + """Compute the signature of the event for the given name and key. + + Args: + event_dict (dict): The event as a dict + signature_name (str): The name of the entity signing the event + (typically the server's hostname). + signing_key (syutil.crypto.SigningKey): The key to sign with + + Returns: + dict[str, dict[str, str]]: Returns a dictionary in the same format of + an event's signatures field. + """ + redact_json = prune_event_dict(event_dict) redact_json.pop("age_ts", None) redact_json.pop("unsigned", None) logger.debug("Signing event: %s", encode_canonical_json(redact_json)) @@ -96,25 +131,25 @@ def compute_event_signature(event, signature_name, signing_key): return redact_json["signatures"] -def add_hashes_and_signatures(event, signature_name, signing_key, +def add_hashes_and_signatures(event_dict, signature_name, signing_key, hash_algorithm=hashlib.sha256): - # if hasattr(event, "old_state_events"): - # state_json_bytes = encode_canonical_json( - # [e.event_id for e in event.old_state_events.values()] - # ) - # hashed = hash_algorithm(state_json_bytes) - # event.state_hash = { - # hashed.name: encode_base64(hashed.digest()) - # } - - name, digest = compute_content_hash(event, hash_algorithm=hash_algorithm) - - if not hasattr(event, "hashes"): - event.hashes = {} - event.hashes[name] = encode_base64(digest) - - event.signatures = compute_event_signature( - event, + """Add content hash and sign the event + + Args: + event_dict (dict): The event to add hashes to and sign + signature_name (str): The name of the entity signing the event + (typically the server's hostname). + signing_key (syutil.crypto.SigningKey): The key to sign with + hash_algorithm: A hasher from `hashlib`, e.g. hashlib.sha256, to use + to hash the event + """ + + name, digest = compute_content_hash(event_dict, hash_algorithm=hash_algorithm) + + event_dict.setdefault("hashes", {})[name] = encode_base64(digest) + + event_dict["signatures"] = compute_event_signature( + event_dict, signature_name=signature_name, signing_key=signing_key, ) diff --git a/synapse/crypto/keyclient.py b/synapse/crypto/keyclient.py deleted file mode 100644 index 080c81f14b..0000000000 --- a/synapse/crypto/keyclient.py +++ /dev/null @@ -1,147 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging - -from canonicaljson import json - -from twisted.internet import defer, reactor -from twisted.internet.error import ConnectError -from twisted.internet.protocol import Factory -from twisted.names.error import DomainError -from twisted.web.http import HTTPClient - -from synapse.http.endpoint import matrix_federation_endpoint -from synapse.util import logcontext - -logger = logging.getLogger(__name__) - -KEY_API_V1 = b"/_matrix/key/v1/" - - -@defer.inlineCallbacks -def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1): - """Fetch the keys for a remote server.""" - - factory = SynapseKeyClientFactory() - factory.path = path - factory.host = server_name - endpoint = matrix_federation_endpoint( - reactor, server_name, tls_client_options_factory, timeout=30 - ) - - for i in range(5): - try: - with logcontext.PreserveLoggingContext(): - protocol = yield endpoint.connect(factory) - server_response, server_certificate = yield protocol.remote_key - defer.returnValue((server_response, server_certificate)) - except SynapseKeyClientError as e: - logger.warn("Error getting key for %r: %s", server_name, e) - if e.status.startswith(b"4"): - # Don't retry for 4xx responses. - raise IOError("Cannot get key for %r" % server_name) - except (ConnectError, DomainError) as e: - logger.warn("Error getting key for %r: %s", server_name, e) - except Exception: - logger.exception("Error getting key for %r", server_name) - raise IOError("Cannot get key for %r" % server_name) - - -class SynapseKeyClientError(Exception): - """The key wasn't retrieved from the remote server.""" - status = None - pass - - -class SynapseKeyClientProtocol(HTTPClient): - """Low level HTTPS client which retrieves an application/json response from - the server and extracts the X.509 certificate for the remote peer from the - SSL connection.""" - - timeout = 30 - - def __init__(self): - self.remote_key = defer.Deferred() - self.host = None - self._peer = None - - def connectionMade(self): - self._peer = self.transport.getPeer() - logger.debug("Connected to %s", self._peer) - - if not isinstance(self.path, bytes): - self.path = self.path.encode('ascii') - - if not isinstance(self.host, bytes): - self.host = self.host.encode('ascii') - - self.sendCommand(b"GET", self.path) - if self.host: - self.sendHeader(b"Host", self.host) - self.endHeaders() - self.timer = reactor.callLater( - self.timeout, - self.on_timeout - ) - - def errback(self, error): - if not self.remote_key.called: - self.remote_key.errback(error) - - def callback(self, result): - if not self.remote_key.called: - self.remote_key.callback(result) - - def handleStatus(self, version, status, message): - if status != b"200": - # logger.info("Non-200 response from %s: %s %s", - # self.transport.getHost(), status, message) - error = SynapseKeyClientError( - "Non-200 response %r from %r" % (status, self.host) - ) - error.status = status - self.errback(error) - self.transport.abortConnection() - - def handleResponse(self, response_body_bytes): - try: - json_response = json.loads(response_body_bytes) - except ValueError: - # logger.info("Invalid JSON response from %s", - # self.transport.getHost()) - self.transport.abortConnection() - return - - certificate = self.transport.getPeerCertificate() - self.callback((json_response, certificate)) - self.transport.abortConnection() - self.timer.cancel() - - def on_timeout(self): - logger.debug( - "Timeout waiting for response from %s: %s", - self.host, self._peer, - ) - self.errback(IOError("Timeout waiting for response")) - self.transport.abortConnection() - - -class SynapseKeyClientFactory(Factory): - def protocol(self): - protocol = SynapseKeyClientProtocol() - protocol.path = self.path - protocol.host = self.host - return protocol diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index d89f94c219..7474fd515f 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2017 New Vector Ltd. +# Copyright 2017, 2018 New Vector Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,10 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import hashlib import logging from collections import namedtuple +from six import raise_from from six.moves import urllib from signedjson.key import ( @@ -32,13 +32,16 @@ from signedjson.sign import ( signature_ids, verify_signed_json, ) -from unpaddedbase64 import decode_base64, encode_base64 +from unpaddedbase64 import decode_base64 -from OpenSSL import crypto from twisted.internet import defer -from synapse.api.errors import Codes, SynapseError -from synapse.crypto.keyclient import fetch_server_key +from synapse.api.errors import ( + Codes, + HttpResponseException, + RequestSendFailed, + SynapseError, +) from synapse.util import logcontext, unwrapFirstError from synapse.util.logcontext import ( LoggingContext, @@ -47,6 +50,7 @@ from synapse.util.logcontext import ( run_in_background, ) from synapse.util.metrics import Measure +from synapse.util.retryutils import NotRetryingDestination logger = logging.getLogger(__name__) @@ -370,13 +374,18 @@ class Keyring(object): server_name_and_key_ids, perspective_name, perspective_keys ) defer.returnValue(result) + except KeyLookupError as e: + logger.warning( + "Key lookup failed from %r: %s", perspective_name, e, + ) except Exception as e: logger.exception( "Unable to get key from %r: %s %s", perspective_name, type(e).__name__, str(e), ) - defer.returnValue({}) + + defer.returnValue({}) results = yield logcontext.make_deferred_yieldable(defer.gatherResults( [ @@ -395,32 +404,13 @@ class Keyring(object): @defer.inlineCallbacks def get_keys_from_server(self, server_name_and_key_ids): - @defer.inlineCallbacks - def get_key(server_name, key_ids): - keys = None - try: - keys = yield self.get_server_verify_key_v2_direct( - server_name, key_ids - ) - except Exception as e: - logger.info( - "Unable to get key %r for %r directly: %s %s", - key_ids, server_name, - type(e).__name__, str(e), - ) - - if not keys: - keys = yield self.get_server_verify_key_v1_direct( - server_name, key_ids - ) - - keys = {server_name: keys} - - defer.returnValue(keys) - results = yield logcontext.make_deferred_yieldable(defer.gatherResults( [ - run_in_background(get_key, server_name, key_ids) + run_in_background( + self.get_server_verify_key_v2_direct, + server_name, + key_ids, + ) for server_name, key_ids in server_name_and_key_ids ], consumeErrors=True, @@ -443,21 +433,30 @@ class Keyring(object): # TODO(mark): Set the minimum_valid_until_ts to that needed by # the events being validated or the current time if validating # an incoming request. - query_response = yield self.client.post_json( - destination=perspective_name, - path="/_matrix/key/v2/query", - data={ - u"server_keys": { - server_name: { - key_id: { - u"minimum_valid_until_ts": 0 - } for key_id in key_ids + try: + query_response = yield self.client.post_json( + destination=perspective_name, + path="/_matrix/key/v2/query", + data={ + u"server_keys": { + server_name: { + key_id: { + u"minimum_valid_until_ts": 0 + } for key_id in key_ids + } + for server_name, key_ids in server_names_and_key_ids } - for server_name, key_ids in server_names_and_key_ids - } - }, - long_retries=True, - ) + }, + long_retries=True, + ) + except (NotRetryingDestination, RequestSendFailed) as e: + raise_from( + KeyLookupError("Failed to connect to remote server"), e, + ) + except HttpResponseException as e: + raise_from( + KeyLookupError("Remote server returned an error"), e, + ) keys = {} @@ -524,34 +523,25 @@ class Keyring(object): if requested_key_id in keys: continue - (response, tls_certificate) = yield fetch_server_key( - server_name, self.hs.tls_client_options_factory, - path=("/_matrix/key/v2/server/%s" % ( - urllib.parse.quote(requested_key_id), - )).encode("ascii"), - ) + try: + response = yield self.client.get_json( + destination=server_name, + path="/_matrix/key/v2/server/" + urllib.parse.quote(requested_key_id), + ignore_backoff=True, + ) + except (NotRetryingDestination, RequestSendFailed) as e: + raise_from( + KeyLookupError("Failed to connect to remote server"), e, + ) + except HttpResponseException as e: + raise_from( + KeyLookupError("Remote server returned an error"), e, + ) if (u"signatures" not in response or server_name not in response[u"signatures"]): raise KeyLookupError("Key response not signed by remote server") - if "tls_fingerprints" not in response: - raise KeyLookupError("Key response missing TLS fingerprints") - - certificate_bytes = crypto.dump_certificate( - crypto.FILETYPE_ASN1, tls_certificate - ) - sha256_fingerprint = hashlib.sha256(certificate_bytes).digest() - sha256_fingerprint_b64 = encode_base64(sha256_fingerprint) - - response_sha256_fingerprints = set() - for fingerprint in response[u"tls_fingerprints"]: - if u"sha256" in fingerprint: - response_sha256_fingerprints.add(fingerprint[u"sha256"]) - - if sha256_fingerprint_b64 not in response_sha256_fingerprints: - raise KeyLookupError("TLS certificate not allowed by fingerprints") - response_keys = yield self.process_v2_response( from_server=server_name, requested_ids=[requested_key_id], @@ -657,78 +647,6 @@ class Keyring(object): defer.returnValue(results) - @defer.inlineCallbacks - def get_server_verify_key_v1_direct(self, server_name, key_ids): - """Finds a verification key for the server with one of the key ids. - Args: - server_name (str): The name of the server to fetch a key for. - keys_ids (list of str): The key_ids to check for. - """ - - # Try to fetch the key from the remote server. - - (response, tls_certificate) = yield fetch_server_key( - server_name, self.hs.tls_client_options_factory - ) - - # Check the response. - - x509_certificate_bytes = crypto.dump_certificate( - crypto.FILETYPE_ASN1, tls_certificate - ) - - if ("signatures" not in response - or server_name not in response["signatures"]): - raise KeyLookupError("Key response not signed by remote server") - - if "tls_certificate" not in response: - raise KeyLookupError("Key response missing TLS certificate") - - tls_certificate_b64 = response["tls_certificate"] - - if encode_base64(x509_certificate_bytes) != tls_certificate_b64: - raise KeyLookupError("TLS certificate doesn't match") - - # Cache the result in the datastore. - - time_now_ms = self.clock.time_msec() - - verify_keys = {} - for key_id, key_base64 in response["verify_keys"].items(): - if is_signing_algorithm_supported(key_id): - key_bytes = decode_base64(key_base64) - verify_key = decode_verify_key_bytes(key_id, key_bytes) - verify_key.time_added = time_now_ms - verify_keys[key_id] = verify_key - - for key_id in response["signatures"][server_name]: - if key_id not in response["verify_keys"]: - raise KeyLookupError( - "Key response must include verification keys for all" - " signatures" - ) - if key_id in verify_keys: - verify_signed_json( - response, - server_name, - verify_keys[key_id] - ) - - yield self.store.store_server_certificate( - server_name, - server_name, - time_now_ms, - tls_certificate, - ) - - yield self.store_keys( - server_name=server_name, - from_server=server_name, - verify_keys=verify_keys, - ) - - defer.returnValue(verify_keys) - def store_keys(self, server_name, from_server, verify_keys): """Store a collection of verify keys for a given server Args: @@ -768,7 +686,7 @@ def _handle_key_deferred(verify_request): try: with PreserveLoggingContext(): _, key_id, verify_key = yield verify_request.deferred - except IOError as e: + except (IOError, RequestSendFailed) as e: logger.warn( "Got IOError when downloading keys for %s: %s %s", server_name, type(e).__name__, str(e), diff --git a/synapse/event_auth.py b/synapse/event_auth.py index d4d4474847..8f9e330da5 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -20,17 +20,25 @@ from signedjson.key import decode_verify_key_bytes from signedjson.sign import SignatureVerifyException, verify_signed_json from unpaddedbase64 import decode_base64 -from synapse.api.constants import KNOWN_ROOM_VERSIONS, EventTypes, JoinRules, Membership +from synapse.api.constants import ( + KNOWN_ROOM_VERSIONS, + EventFormatVersions, + EventTypes, + JoinRules, + Membership, + RoomVersions, +) from synapse.api.errors import AuthError, EventSizeError, SynapseError from synapse.types import UserID, get_domain_from_id logger = logging.getLogger(__name__) -def check(event, auth_events, do_sig_check=True, do_size_check=True): +def check(room_version, event, auth_events, do_sig_check=True, do_size_check=True): """ Checks if this event is correctly authed. Args: + room_version (str): the version of the room event: the event being checked. auth_events (dict: event-key -> event): the existing room state. @@ -48,7 +56,6 @@ def check(event, auth_events, do_sig_check=True, do_size_check=True): if do_sig_check: sender_domain = get_domain_from_id(event.sender) - event_id_domain = get_domain_from_id(event.event_id) is_invite_via_3pid = ( event.type == EventTypes.Member @@ -65,9 +72,13 @@ def check(event, auth_events, do_sig_check=True, do_size_check=True): if not is_invite_via_3pid: raise AuthError(403, "Event not signed by sender's server") - # Check the event_id's domain has signed the event - if not event.signatures.get(event_id_domain): - raise AuthError(403, "Event not signed by sending server") + if event.format_version in (EventFormatVersions.V1,): + # Only older room versions have event IDs to check. + event_id_domain = get_domain_from_id(event.event_id) + + # Check the origin domain has signed the event + if not event.signatures.get(event_id_domain): + raise AuthError(403, "Event not signed by sending server") if auth_events is None: # Oh, we don't know what the state of the room was, so we @@ -167,7 +178,7 @@ def check(event, auth_events, do_sig_check=True, do_size_check=True): _check_power_levels(event, auth_events) if event.type == EventTypes.Redaction: - check_redaction(event, auth_events) + check_redaction(room_version, event, auth_events) logger.debug("Allowing! %s", event) @@ -200,11 +211,11 @@ def _is_membership_change_allowed(event, auth_events): membership = event.content["membership"] # Check if this is the room creator joining: - if len(event.prev_events) == 1 and Membership.JOIN == membership: + if len(event.prev_event_ids()) == 1 and Membership.JOIN == membership: # Get room creation event: key = (EventTypes.Create, "", ) create = auth_events.get(key) - if create and event.prev_events[0][0] == create.event_id: + if create and event.prev_event_ids()[0] == create.event_id: if create.content["creator"] == event.state_key: return @@ -421,7 +432,7 @@ def _can_send_event(event, auth_events): return True -def check_redaction(event, auth_events): +def check_redaction(room_version, event, auth_events): """Check whether the event sender is allowed to redact the target event. Returns: @@ -441,10 +452,16 @@ def check_redaction(event, auth_events): if user_level >= redact_level: return False - redacter_domain = get_domain_from_id(event.event_id) - redactee_domain = get_domain_from_id(event.redacts) - if redacter_domain == redactee_domain: + if room_version in (RoomVersions.V1, RoomVersions.V2,): + redacter_domain = get_domain_from_id(event.event_id) + redactee_domain = get_domain_from_id(event.redacts) + if redacter_domain == redactee_domain: + return True + elif room_version == RoomVersions.V3: + event.internal_metadata.recheck_redaction = True return True + else: + raise RuntimeError("Unrecognized room version %r" % (room_version,)) raise AuthError( 403, diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 12f1eb0a3e..20c1ab4203 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +19,9 @@ from distutils.util import strtobool import six +from unpaddedbase64 import encode_base64 + +from synapse.api.constants import KNOWN_ROOM_VERSIONS, EventFormatVersions, RoomVersions from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze @@ -41,8 +45,13 @@ class _EventInternalMetadata(object): def is_outlier(self): return getattr(self, "outlier", False) - def is_invite_from_remote(self): - return getattr(self, "invite_from_remote", False) + def is_out_of_band_membership(self): + """Whether this is an out of band membership, like an invite or an invite + rejection. This is needed as those events are marked as outliers, but + they still need to be processed as if they're new events (e.g. updating + invite state in the database, relaying to clients, etc). + """ + return getattr(self, "out_of_band_membership", False) def get_send_on_behalf_of(self): """Whether this server should send the event on behalf of another server. @@ -53,6 +62,21 @@ class _EventInternalMetadata(object): """ return getattr(self, "send_on_behalf_of", None) + def need_to_check_redaction(self): + """Whether the redaction event needs to be rechecked when fetching + from the database. + + Starting in room v3 redaction events are accepted up front, and later + checked to see if the redacter and redactee's domains match. + + If the sender of the redaction event is allowed to redact any event + due to auth rules, then this will always return false. + + Returns: + bool + """ + return getattr(self, "recheck_redaction", False) + def _event_dict_property(key): # We want to be able to use hasattr with the event dict properties. @@ -159,8 +183,28 @@ class EventBase(object): def keys(self): return six.iterkeys(self._event_dict) + def prev_event_ids(self): + """Returns the list of prev event IDs. The order matches the order + specified in the event, though there is no meaning to it. + + Returns: + list[str]: The list of event IDs of this event's prev_events + """ + return [e for e, _ in self.prev_events] + + def auth_event_ids(self): + """Returns the list of auth event IDs. The order matches the order + specified in the event, though there is no meaning to it. + + Returns: + list[str]: The list of event IDs of this event's auth_events + """ + return [e for e, _ in self.auth_events] + class FrozenEvent(EventBase): + format_version = EventFormatVersions.V1 # All events of this type are V1 + def __init__(self, event_dict, internal_metadata_dict={}, rejected_reason=None): event_dict = dict(event_dict) @@ -195,22 +239,136 @@ class FrozenEvent(EventBase): rejected_reason=rejected_reason, ) - @staticmethod - def from_event(event): - e = FrozenEvent( - event.get_pdu_json() + def __str__(self): + return self.__repr__() + + def __repr__(self): + return "<FrozenEvent event_id='%s', type='%s', state_key='%s'>" % ( + self.get("event_id", None), + self.get("type", None), + self.get("state_key", None), + ) + + +class FrozenEventV2(EventBase): + format_version = EventFormatVersions.V2 # All events of this type are V2 + + def __init__(self, event_dict, internal_metadata_dict={}, rejected_reason=None): + event_dict = dict(event_dict) + + # Signatures is a dict of dicts, and this is faster than doing a + # copy.deepcopy + signatures = { + name: {sig_id: sig for sig_id, sig in sigs.items()} + for name, sigs in event_dict.pop("signatures", {}).items() + } + + assert "event_id" not in event_dict + + unsigned = dict(event_dict.pop("unsigned", {})) + + # We intern these strings because they turn up a lot (especially when + # caching). + event_dict = intern_dict(event_dict) + + if USE_FROZEN_DICTS: + frozen_dict = freeze(event_dict) + else: + frozen_dict = event_dict + + self._event_id = None + self.type = event_dict["type"] + if "state_key" in event_dict: + self.state_key = event_dict["state_key"] + + super(FrozenEventV2, self).__init__( + frozen_dict, + signatures=signatures, + unsigned=unsigned, + internal_metadata_dict=internal_metadata_dict, + rejected_reason=rejected_reason, ) - e.internal_metadata = event.internal_metadata + @property + def event_id(self): + # We have to import this here as otherwise we get an import loop which + # is hard to break. + from synapse.crypto.event_signing import compute_event_reference_hash + + if self._event_id: + return self._event_id + self._event_id = "$" + encode_base64(compute_event_reference_hash(self)[1]) + return self._event_id + + def prev_event_ids(self): + """Returns the list of prev event IDs. The order matches the order + specified in the event, though there is no meaning to it. + + Returns: + list[str]: The list of event IDs of this event's prev_events + """ + return self.prev_events + + def auth_event_ids(self): + """Returns the list of auth event IDs. The order matches the order + specified in the event, though there is no meaning to it. - return e + Returns: + list[str]: The list of event IDs of this event's auth_events + """ + return self.auth_events def __str__(self): return self.__repr__() def __repr__(self): - return "<FrozenEvent event_id='%s', type='%s', state_key='%s'>" % ( - self.get("event_id", None), + return "<FrozenEventV2 event_id='%s', type='%s', state_key='%s'>" % ( + self.event_id, self.get("type", None), self.get("state_key", None), ) + + +def room_version_to_event_format(room_version): + """Converts a room version string to the event format + + Args: + room_version (str) + + Returns: + int + """ + if room_version not in KNOWN_ROOM_VERSIONS: + # We should have already checked version, so this should not happen + raise RuntimeError("Unrecognized room version %s" % (room_version,)) + + if room_version in ( + RoomVersions.V1, RoomVersions.V2, RoomVersions.STATE_V2_TEST, + ): + return EventFormatVersions.V1 + elif room_version in (RoomVersions.V3,): + return EventFormatVersions.V2 + else: + raise RuntimeError("Unrecognized room version %s" % (room_version,)) + + +def event_type_from_format_version(format_version): + """Returns the python type to use to construct an Event object for the + given event format version. + + Args: + format_version (int): The event format version + + Returns: + type: A type that can be initialized as per the initializer of + `FrozenEvent` + """ + + if format_version == EventFormatVersions.V1: + return FrozenEvent + elif format_version == EventFormatVersions.V2: + return FrozenEventV2 + else: + raise Exception( + "No event format %r" % (format_version,) + ) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index e662eaef10..06e01be918 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -13,63 +13,270 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy +import attr +from twisted.internet import defer + +from synapse.api.constants import ( + KNOWN_EVENT_FORMAT_VERSIONS, + KNOWN_ROOM_VERSIONS, + MAX_DEPTH, + EventFormatVersions, +) +from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.types import EventID from synapse.util.stringutils import random_string -from . import EventBase, FrozenEvent, _event_dict_property +from . import ( + _EventInternalMetadata, + event_type_from_format_version, + room_version_to_event_format, +) + + +@attr.s(slots=True, cmp=False, frozen=True) +class EventBuilder(object): + """A format independent event builder used to build up the event content + before signing the event. + + (Note that while objects of this class are frozen, the + content/unsigned/internal_metadata fields are still mutable) + + Attributes: + format_version (int): Event format version + room_id (str) + type (str) + sender (str) + content (dict) + unsigned (dict) + internal_metadata (_EventInternalMetadata) + + _state (StateHandler) + _auth (synapse.api.Auth) + _store (DataStore) + _clock (Clock) + _hostname (str): The hostname of the server creating the event + _signing_key: The signing key to use to sign the event as the server + """ + + _state = attr.ib() + _auth = attr.ib() + _store = attr.ib() + _clock = attr.ib() + _hostname = attr.ib() + _signing_key = attr.ib() + + format_version = attr.ib() + + room_id = attr.ib() + type = attr.ib() + sender = attr.ib() + + content = attr.ib(default=attr.Factory(dict)) + unsigned = attr.ib(default=attr.Factory(dict)) + + # These only exist on a subset of events, so they raise AttributeError if + # someone tries to get them when they don't exist. + _state_key = attr.ib(default=None) + _redacts = attr.ib(default=None) + internal_metadata = attr.ib(default=attr.Factory(lambda: _EventInternalMetadata({}))) -class EventBuilder(EventBase): - def __init__(self, key_values={}, internal_metadata_dict={}): - signatures = copy.deepcopy(key_values.pop("signatures", {})) - unsigned = copy.deepcopy(key_values.pop("unsigned", {})) + @property + def state_key(self): + if self._state_key is not None: + return self._state_key - super(EventBuilder, self).__init__( - key_values, - signatures=signatures, - unsigned=unsigned, - internal_metadata_dict=internal_metadata_dict, + raise AttributeError("state_key") + + def is_state(self): + return self._state_key is not None + + @defer.inlineCallbacks + def build(self, prev_event_ids): + """Transform into a fully signed and hashed event + + Args: + prev_event_ids (list[str]): The event IDs to use as the prev events + + Returns: + Deferred[FrozenEvent] + """ + + state_ids = yield self._state.get_current_state_ids( + self.room_id, prev_event_ids, + ) + auth_ids = yield self._auth.compute_auth_events( + self, state_ids, ) - event_id = _event_dict_property("event_id") - state_key = _event_dict_property("state_key") - type = _event_dict_property("type") + if self.format_version == EventFormatVersions.V1: + auth_events = yield self._store.add_event_hashes(auth_ids) + prev_events = yield self._store.add_event_hashes(prev_event_ids) + else: + auth_events = auth_ids + prev_events = prev_event_ids + + old_depth = yield self._store.get_max_depth_of( + prev_event_ids, + ) + depth = old_depth + 1 + + # we cap depth of generated events, to ensure that they are not + # rejected by other servers (and so that they can be persisted in + # the db) + depth = min(depth, MAX_DEPTH) + + event_dict = { + "auth_events": auth_events, + "prev_events": prev_events, + "type": self.type, + "room_id": self.room_id, + "sender": self.sender, + "content": self.content, + "unsigned": self.unsigned, + "depth": depth, + "prev_state": [], + } + + if self.is_state(): + event_dict["state_key"] = self._state_key - def build(self): - return FrozenEvent.from_event(self) + if self._redacts is not None: + event_dict["redacts"] = self._redacts + + defer.returnValue( + create_local_event_from_event_dict( + clock=self._clock, + hostname=self._hostname, + signing_key=self._signing_key, + format_version=self.format_version, + event_dict=event_dict, + internal_metadata_dict=self.internal_metadata.get_dict(), + ) + ) class EventBuilderFactory(object): - def __init__(self, clock, hostname): - self.clock = clock - self.hostname = hostname + def __init__(self, hs): + self.clock = hs.get_clock() + self.hostname = hs.hostname + self.signing_key = hs.config.signing_key[0] + + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + self.auth = hs.get_auth() + + def new(self, room_version, key_values): + """Generate an event builder appropriate for the given room version + + Args: + room_version (str): Version of the room that we're creating an + event builder for + key_values (dict): Fields used as the basis of the new event + + Returns: + EventBuilder + """ + + # There's currently only the one event version defined + if room_version not in KNOWN_ROOM_VERSIONS: + raise Exception( + "No event format defined for version %r" % (room_version,) + ) + + return EventBuilder( + store=self.store, + state=self.state, + auth=self.auth, + clock=self.clock, + hostname=self.hostname, + signing_key=self.signing_key, + format_version=room_version_to_event_format(room_version), + type=key_values["type"], + state_key=key_values.get("state_key"), + room_id=key_values["room_id"], + sender=key_values["sender"], + content=key_values.get("content", {}), + unsigned=key_values.get("unsigned", {}), + redacts=key_values.get("redacts", None), + ) + + +def create_local_event_from_event_dict(clock, hostname, signing_key, + format_version, event_dict, + internal_metadata_dict=None): + """Takes a fully formed event dict, ensuring that fields like `origin` + and `origin_server_ts` have correct values for a locally produced event, + then signs and hashes it. + + Args: + clock (Clock) + hostname (str) + signing_key + format_version (int) + event_dict (dict) + internal_metadata_dict (dict|None) + + Returns: + FrozenEvent + """ + + # There's currently only the one event version defined + if format_version not in KNOWN_EVENT_FORMAT_VERSIONS: + raise Exception( + "No event format defined for version %r" % (format_version,) + ) + + if internal_metadata_dict is None: + internal_metadata_dict = {} + + time_now = int(clock.time_msec()) + + if format_version == EventFormatVersions.V1: + event_dict["event_id"] = _create_event_id(clock, hostname) + + event_dict["origin"] = hostname + event_dict["origin_server_ts"] = time_now + + event_dict.setdefault("unsigned", {}) + age = event_dict["unsigned"].pop("age", 0) + event_dict["unsigned"].setdefault("age_ts", time_now - age) + + event_dict.setdefault("signatures", {}) + + add_hashes_and_signatures( + event_dict, + hostname, + signing_key, + ) + return event_type_from_format_version(format_version)( + event_dict, internal_metadata_dict=internal_metadata_dict, + ) - self.event_id_count = 0 - def create_event_id(self): - i = str(self.event_id_count) - self.event_id_count += 1 +# A counter used when generating new event IDs +_event_id_counter = 0 - local_part = str(int(self.clock.time())) + i + random_string(5) - e_id = EventID(local_part, self.hostname) +def _create_event_id(clock, hostname): + """Create a new event ID - return e_id.to_string() + Args: + clock (Clock) + hostname (str): The server name for the event ID - def new(self, key_values={}): - key_values["event_id"] = self.create_event_id() + Returns: + str + """ - time_now = int(self.clock.time_msec()) + global _event_id_counter - key_values.setdefault("origin", self.hostname) - key_values.setdefault("origin_server_ts", time_now) + i = str(_event_id_counter) + _event_id_counter += 1 - key_values.setdefault("unsigned", {}) - age = key_values["unsigned"].pop("age", 0) - key_values["unsigned"].setdefault("age_ts", time_now - age) + local_part = str(int(clock.time())) + i + random_string(5) - key_values["signatures"] = {} + e_id = EventID(local_part, hostname) - return EventBuilder(key_values=key_values,) + return e_id.to_string() diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 652941ca0d..07fccdd8f9 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -38,8 +38,31 @@ def prune_event(event): This is used when we "redact" an event. We want to remove all fields that the user has specified, but we do want to keep necessary information like type, state_key etc. + + Args: + event (FrozenEvent) + + Returns: + FrozenEvent + """ + pruned_event_dict = prune_event_dict(event.get_dict()) + + from . import event_type_from_format_version + return event_type_from_format_version(event.format_version)( + pruned_event_dict, event.internal_metadata.get_dict() + ) + + +def prune_event_dict(event_dict): + """Redacts the event_dict in the same way as `prune_event`, except it + operates on dicts rather than event objects + + Args: + event_dict (dict) + + Returns: + dict: A copy of the pruned event dict """ - event_type = event.type allowed_keys = [ "event_id", @@ -59,13 +82,13 @@ def prune_event(event): "membership", ] - event_dict = event.get_dict() + event_type = event_dict["type"] new_content = {} def add_fields(*fields): for field in fields: - if field in event.content: + if field in event_dict["content"]: new_content[field] = event_dict["content"][field] if event_type == EventTypes.Member: @@ -98,17 +121,17 @@ def prune_event(event): allowed_fields["content"] = new_content - allowed_fields["unsigned"] = {} + unsigned = {} + allowed_fields["unsigned"] = unsigned - if "age_ts" in event.unsigned: - allowed_fields["unsigned"]["age_ts"] = event.unsigned["age_ts"] - if "replaces_state" in event.unsigned: - allowed_fields["unsigned"]["replaces_state"] = event.unsigned["replaces_state"] + event_unsigned = event_dict.get("unsigned", {}) - return type(event)( - allowed_fields, - internal_metadata_dict=event.internal_metadata.get_dict() - ) + if "age_ts" in event_unsigned: + unsigned["age_ts"] = event_unsigned["age_ts"] + if "replaces_state" in event_unsigned: + unsigned["replaces_state"] = event_unsigned["replaces_state"] + + return allowed_fields def _copy_field(src, dst, field): @@ -244,6 +267,7 @@ def serialize_event(e, time_now_ms, as_client_event=True, Returns: dict """ + # FIXME(erikj): To handle the case of presence events and the like if not isinstance(e, EventBase): return e @@ -253,6 +277,8 @@ def serialize_event(e, time_now_ms, as_client_event=True, # Should this strip out None's? d = {k: v for k, v in e.get_dict().items()} + d["event_id"] = e.event_id + if "age_ts" in d["unsigned"]: d["unsigned"]["age"] = time_now_ms - d["unsigned"]["age_ts"] del d["unsigned"]["age_ts"] diff --git a/synapse/events/validator.py b/synapse/events/validator.py index cf184748a1..a072674b02 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -15,23 +15,29 @@ from six import string_types -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventFormatVersions, EventTypes, Membership from synapse.api.errors import SynapseError from synapse.types import EventID, RoomID, UserID class EventValidator(object): + def validate_new(self, event): + """Validates the event has roughly the right format - def validate(self, event): - EventID.from_string(event.event_id) - RoomID.from_string(event.room_id) + Args: + event (FrozenEvent) + """ + self.validate_builder(event) + + if event.format_version == EventFormatVersions.V1: + EventID.from_string(event.event_id) required = [ - # "auth_events", + "auth_events", "content", - # "hashes", + "hashes", "origin", - # "prev_events", + "prev_events", "sender", "type", ] @@ -41,8 +47,25 @@ class EventValidator(object): raise SynapseError(400, "Event does not have key %s" % (k,)) # Check that the following keys have string values - strings = [ + event_strings = [ "origin", + ] + + for s in event_strings: + if not isinstance(getattr(event, s), string_types): + raise SynapseError(400, "'%s' not a string type" % (s,)) + + def validate_builder(self, event): + """Validates that the builder/event has roughly the right format. Only + checks values that we expect a proto event to have, rather than all the + fields an event would have + + Args: + event (EventBuilder|FrozenEvent) + """ + + strings = [ + "room_id", "sender", "type", ] @@ -54,22 +77,7 @@ class EventValidator(object): if not isinstance(getattr(event, s), string_types): raise SynapseError(400, "Not '%s' a string type" % (s,)) - if event.type == EventTypes.Member: - if "membership" not in event.content: - raise SynapseError(400, "Content has not membership key") - - if event.content["membership"] not in Membership.LIST: - raise SynapseError(400, "Invalid membership key") - - # Check that the following keys have dictionary values - # TODO - - # Check that the following keys have the correct format for DAGs - # TODO - - def validate_new(self, event): - self.validate(event) - + RoomID.from_string(event.room_id) UserID.from_string(event.sender) if event.type == EventTypes.Message: @@ -86,9 +94,16 @@ class EventValidator(object): elif event.type == EventTypes.Name: self._ensure_strings(event.content, ["name"]) + elif event.type == EventTypes.Member: + if "membership" not in event.content: + raise SynapseError(400, "Content has not membership key") + + if event.content["membership"] not in Membership.LIST: + raise SynapseError(400, "Invalid membership key") + def _ensure_strings(self, d, keys): for s in keys: if s not in d: raise SynapseError(400, "'%s' not in content" % (s,)) if not isinstance(d[s], string_types): - raise SynapseError(400, "Not '%s' a string type" % (s,)) + raise SynapseError(400, "'%s' not a string type" % (s,)) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index b7ad729c63..a7a2ec4523 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -20,10 +20,10 @@ import six from twisted.internet import defer from twisted.internet.defer import DeferredList -from synapse.api.constants import MAX_DEPTH, EventTypes, Membership +from synapse.api.constants import MAX_DEPTH, EventTypes, Membership, RoomVersions from synapse.api.errors import Codes, SynapseError from synapse.crypto.event_signing import check_event_content_hash -from synapse.events import FrozenEvent +from synapse.events import event_type_from_format_version from synapse.events.utils import prune_event from synapse.http.servlet import assert_params_in_dict from synapse.types import get_domain_from_id @@ -43,8 +43,8 @@ class FederationBase(object): self._clock = hs.get_clock() @defer.inlineCallbacks - def _check_sigs_and_hash_and_fetch(self, origin, pdus, outlier=False, - include_none=False): + def _check_sigs_and_hash_and_fetch(self, origin, pdus, room_version, + outlier=False, include_none=False): """Takes a list of PDUs and checks the signatures and hashs of each one. If a PDU fails its signature check then we check if we have it in the database and if not then request if from the originating server of @@ -56,13 +56,17 @@ class FederationBase(object): a new list. Args: + origin (str) pdu (list) - outlier (bool) + room_version (str) + outlier (bool): Whether the events are outliers or not + include_none (str): Whether to include None in the returned list + for events that have failed their checks Returns: Deferred : A list of PDUs that have valid signatures and hashes. """ - deferreds = self._check_sigs_and_hashes(pdus) + deferreds = self._check_sigs_and_hashes(room_version, pdus) @defer.inlineCallbacks def handle_check_result(pdu, deferred): @@ -84,6 +88,7 @@ class FederationBase(object): res = yield self.get_pdu( destinations=[pdu.origin], event_id=pdu.event_id, + room_version=room_version, outlier=outlier, timeout=10000, ) @@ -116,16 +121,17 @@ class FederationBase(object): else: defer.returnValue([p for p in valid_pdus if p]) - def _check_sigs_and_hash(self, pdu): + def _check_sigs_and_hash(self, room_version, pdu): return logcontext.make_deferred_yieldable( - self._check_sigs_and_hashes([pdu])[0], + self._check_sigs_and_hashes(room_version, [pdu])[0], ) - def _check_sigs_and_hashes(self, pdus): + def _check_sigs_and_hashes(self, room_version, pdus): """Checks that each of the received events is correctly signed by the sending server. Args: + room_version (str): The room version of the PDUs pdus (list[FrozenEvent]): the events to be checked Returns: @@ -136,7 +142,7 @@ class FederationBase(object): * throws a SynapseError if the signature check failed. The deferreds run their callbacks in the sentinel logcontext. """ - deferreds = _check_sigs_on_pdus(self.keyring, pdus) + deferreds = _check_sigs_on_pdus(self.keyring, room_version, pdus) ctx = logcontext.LoggingContext.current_context() @@ -198,16 +204,17 @@ class FederationBase(object): class PduToCheckSig(namedtuple("PduToCheckSig", [ - "pdu", "redacted_pdu_json", "event_id_domain", "sender_domain", "deferreds", + "pdu", "redacted_pdu_json", "sender_domain", "deferreds", ])): pass -def _check_sigs_on_pdus(keyring, pdus): +def _check_sigs_on_pdus(keyring, room_version, pdus): """Check that the given events are correctly signed Args: keyring (synapse.crypto.Keyring): keyring object to do the checks + room_version (str): the room version of the PDUs pdus (Collection[EventBase]): the events to be checked Returns: @@ -220,9 +227,7 @@ def _check_sigs_on_pdus(keyring, pdus): # we want to check that the event is signed by: # - # (a) the server which created the event_id - # - # (b) the sender's server. + # (a) the sender's server # # - except in the case of invites created from a 3pid invite, which are exempt # from this check, because the sender has to match that of the original 3pid @@ -236,34 +241,26 @@ def _check_sigs_on_pdus(keyring, pdus): # and signatures are *supposed* to be valid whether or not an event has been # redacted. But this isn't the worst of the ways that 3pid invites are broken. # + # (b) for V1 and V2 rooms, the server which created the event_id + # # let's start by getting the domain for each pdu, and flattening the event back # to JSON. + pdus_to_check = [ PduToCheckSig( pdu=p, redacted_pdu_json=prune_event(p).get_pdu_json(), - event_id_domain=get_domain_from_id(p.event_id), sender_domain=get_domain_from_id(p.sender), deferreds=[], ) for p in pdus ] - # first make sure that the event is signed by the event_id's domain - deferreds = keyring.verify_json_objects_for_server([ - (p.event_id_domain, p.redacted_pdu_json) - for p in pdus_to_check - ]) - - for p, d in zip(pdus_to_check, deferreds): - p.deferreds.append(d) - - # now let's look for events where the sender's domain is different to the - # event id's domain (normally only the case for joins/leaves), and add additional - # checks. + # First we check that the sender event is signed by the sender's domain + # (except if its a 3pid invite, in which case it may be sent by any server) pdus_to_check_sender = [ p for p in pdus_to_check - if p.sender_domain != p.event_id_domain and not _is_invite_via_3pid(p.pdu) + if not _is_invite_via_3pid(p.pdu) ] more_deferreds = keyring.verify_json_objects_for_server([ @@ -274,19 +271,43 @@ def _check_sigs_on_pdus(keyring, pdus): for p, d in zip(pdus_to_check_sender, more_deferreds): p.deferreds.append(d) + # now let's look for events where the sender's domain is different to the + # event id's domain (normally only the case for joins/leaves), and add additional + # checks. Only do this if the room version has a concept of event ID domain + if room_version in ( + RoomVersions.V1, RoomVersions.V2, RoomVersions.STATE_V2_TEST, + ): + pdus_to_check_event_id = [ + p for p in pdus_to_check + if p.sender_domain != get_domain_from_id(p.pdu.event_id) + ] + + more_deferreds = keyring.verify_json_objects_for_server([ + (get_domain_from_id(p.pdu.event_id), p.redacted_pdu_json) + for p in pdus_to_check_event_id + ]) + + for p, d in zip(pdus_to_check_event_id, more_deferreds): + p.deferreds.append(d) + elif room_version in (RoomVersions.V3,): + pass # No further checks needed, as event IDs are hashes here + else: + raise RuntimeError("Unrecognized room version %s" % (room_version,)) + # replace lists of deferreds with single Deferreds return [_flatten_deferred_list(p.deferreds) for p in pdus_to_check] def _flatten_deferred_list(deferreds): - """Given a list of one or more deferreds, either return the single deferred, or - combine into a DeferredList. + """Given a list of deferreds, either return the single deferred, + combine into a DeferredList, or return an already resolved deferred. """ if len(deferreds) > 1: return DeferredList(deferreds, fireOnOneErrback=True, consumeErrors=True) - else: - assert len(deferreds) == 1 + elif len(deferreds) == 1: return deferreds[0] + else: + return defer.succeed(None) def _is_invite_via_3pid(event): @@ -297,11 +318,12 @@ def _is_invite_via_3pid(event): ) -def event_from_pdu_json(pdu_json, outlier=False): +def event_from_pdu_json(pdu_json, event_format_version, outlier=False): """Construct a FrozenEvent from an event json received over federation Args: pdu_json (object): pdu as received over federation + event_format_version (int): The event format version outlier (bool): True to mark this event as an outlier Returns: @@ -313,7 +335,7 @@ def event_from_pdu_json(pdu_json, outlier=False): """ # we could probably enforce a bunch of other fields here (room_id, sender, # origin, etc etc) - assert_params_in_dict(pdu_json, ('event_id', 'type', 'depth')) + assert_params_in_dict(pdu_json, ('type', 'depth')) depth = pdu_json['depth'] if not isinstance(depth, six.integer_types): @@ -325,8 +347,8 @@ def event_from_pdu_json(pdu_json, outlier=False): elif depth > MAX_DEPTH: raise SynapseError(400, "Depth too large", Codes.BAD_JSON) - event = FrozenEvent( - pdu_json + event = event_type_from_format_version(event_format_version)( + pdu_json, ) event.internal_metadata.outlier = outlier diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index d05ed91d64..58e04d81ab 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -25,14 +25,20 @@ from prometheus_client import Counter from twisted.internet import defer -from synapse.api.constants import KNOWN_ROOM_VERSIONS, EventTypes, Membership +from synapse.api.constants import ( + KNOWN_ROOM_VERSIONS, + EventTypes, + Membership, + RoomVersions, +) from synapse.api.errors import ( CodeMessageException, + Codes, FederationDeniedError, HttpResponseException, SynapseError, ) -from synapse.events import builder +from synapse.events import builder, room_version_to_event_format from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.util import logcontext, unwrapFirstError from synapse.util.caches.expiringcache import ExpiringCache @@ -66,6 +72,9 @@ class FederationClient(FederationBase): self.state = hs.get_state_handler() self.transport_layer = hs.get_federation_transport_client() + self.hostname = hs.hostname + self.signing_key = hs.config.signing_key[0] + self._get_pdu_cache = ExpiringCache( cache_name="get_pdu_cache", clock=self._clock, @@ -162,13 +171,13 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function - def backfill(self, dest, context, limit, extremities): + def backfill(self, dest, room_id, limit, extremities): """Requests some more historic PDUs for the given context from the given destination server. Args: dest (str): The remote home server to ask. - context (str): The context to backfill. + room_id (str): The room_id to backfill. limit (int): The maximum number of PDUs to return. extremities (list): List of PDU id and origins of the first pdus we have seen from the context @@ -183,18 +192,21 @@ class FederationClient(FederationBase): return transaction_data = yield self.transport_layer.backfill( - dest, context, extremities, limit) + dest, room_id, extremities, limit) logger.debug("backfill transaction_data=%s", repr(transaction_data)) + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + pdus = [ - event_from_pdu_json(p, outlier=False) + event_from_pdu_json(p, format_ver, outlier=False) for p in transaction_data["pdus"] ] # FIXME: We should handle signature failures more gracefully. pdus[:] = yield logcontext.make_deferred_yieldable(defer.gatherResults( - self._check_sigs_and_hashes(pdus), + self._check_sigs_and_hashes(room_version, pdus), consumeErrors=True, ).addErrback(unwrapFirstError)) @@ -202,7 +214,8 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function - def get_pdu(self, destinations, event_id, outlier=False, timeout=None): + def get_pdu(self, destinations, event_id, room_version, outlier=False, + timeout=None): """Requests the PDU with given origin and ID from the remote home servers. @@ -212,6 +225,7 @@ class FederationClient(FederationBase): Args: destinations (list): Which home servers to query event_id (str): event to fetch + room_version (str): version of the room outlier (bool): Indicates whether the PDU is an `outlier`, i.e. if it's from an arbitary point in the context as opposed to part of the current block of PDUs. Defaults to `False` @@ -230,6 +244,8 @@ class FederationClient(FederationBase): pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {}) + format_ver = room_version_to_event_format(room_version) + signed_pdu = None for destination in destinations: now = self._clock.time_msec() @@ -245,7 +261,7 @@ class FederationClient(FederationBase): logger.debug("transaction_data %r", transaction_data) pdu_list = [ - event_from_pdu_json(p, outlier=outlier) + event_from_pdu_json(p, format_ver, outlier=outlier) for p in transaction_data["pdus"] ] @@ -253,7 +269,7 @@ class FederationClient(FederationBase): pdu = pdu_list[0] # Check signatures are correct. - signed_pdu = yield self._check_sigs_and_hash(pdu) + signed_pdu = yield self._check_sigs_and_hash(room_version, pdu) break @@ -339,12 +355,16 @@ class FederationClient(FederationBase): destination, room_id, event_id=event_id, ) + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + pdus = [ - event_from_pdu_json(p, outlier=True) for p in result["pdus"] + event_from_pdu_json(p, format_ver, outlier=True) + for p in result["pdus"] ] auth_chain = [ - event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, format_ver, outlier=True) for p in result.get("auth_chain", []) ] @@ -355,7 +375,8 @@ class FederationClient(FederationBase): signed_pdus = yield self._check_sigs_and_hash_and_fetch( destination, [p for p in pdus if p.event_id not in seen_events], - outlier=True + outlier=True, + room_version=room_version, ) signed_pdus.extend( seen_events[p.event_id] for p in pdus if p.event_id in seen_events @@ -364,7 +385,8 @@ class FederationClient(FederationBase): signed_auth = yield self._check_sigs_and_hash_and_fetch( destination, [p for p in auth_chain if p.event_id not in seen_events], - outlier=True + outlier=True, + room_version=room_version, ) signed_auth.extend( seen_events[p.event_id] for p in auth_chain if p.event_id in seen_events @@ -411,6 +433,8 @@ class FederationClient(FederationBase): random.shuffle(srvs) return srvs + room_version = yield self.store.get_room_version(room_id) + batch_size = 20 missing_events = list(missing_events) for i in range(0, len(missing_events), batch_size): @@ -421,6 +445,7 @@ class FederationClient(FederationBase): self.get_pdu, destinations=random_server_list(), event_id=e_id, + room_version=room_version, ) for e_id in batch ] @@ -445,13 +470,17 @@ class FederationClient(FederationBase): destination, room_id, event_id, ) + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + auth_chain = [ - event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, format_ver, outlier=True) for p in res["auth_chain"] ] signed_auth = yield self._check_sigs_and_hash_and_fetch( - destination, auth_chain, outlier=True + destination, auth_chain, + outlier=True, room_version=room_version, ) signed_auth.sort(key=lambda e: e.depth) @@ -522,6 +551,8 @@ class FederationClient(FederationBase): Does so by asking one of the already participating servers to create an event with proper context. + Returns a fully signed and hashed event. + Note that this does not append any events to any graphs. Args: @@ -536,8 +567,10 @@ class FederationClient(FederationBase): params (dict[str, str|Iterable[str]]): Query parameters to include in the request. Return: - Deferred: resolves to a tuple of (origin (str), event (object)) - where origin is the remote homeserver which generated the event. + Deferred[tuple[str, FrozenEvent, int]]: resolves to a tuple of + `(origin, event, event_format)` where origin is the remote + homeserver which generated the event, and event_format is one of + `synapse.api.constants.EventFormatVersions`. Fails with a ``SynapseError`` if the chosen remote server returns a 300/400 code. @@ -557,6 +590,11 @@ class FederationClient(FederationBase): destination, room_id, user_id, membership, params, ) + # Note: If not supplied, the room version may be either v1 or v2, + # however either way the event format version will be v1. + room_version = ret.get("room_version", RoomVersions.V1) + event_format = room_version_to_event_format(room_version) + pdu_dict = ret.get("event", None) if not isinstance(pdu_dict, dict): raise InvalidResponseError("Bad 'event' field in response") @@ -571,17 +609,20 @@ class FederationClient(FederationBase): if "prev_state" not in pdu_dict: pdu_dict["prev_state"] = [] - ev = builder.EventBuilder(pdu_dict) + ev = builder.create_local_event_from_event_dict( + self._clock, self.hostname, self.signing_key, + format_version=event_format, event_dict=pdu_dict, + ) defer.returnValue( - (destination, ev) + (destination, ev, event_format) ) return self._try_destination_list( "make_" + membership, destinations, send_request, ) - def send_join(self, destinations, pdu): + def send_join(self, destinations, pdu, event_format_version): """Sends a join event to one of a list of homeservers. Doing so will cause the remote server to add the event to the graph, @@ -591,6 +632,7 @@ class FederationClient(FederationBase): destinations (str): Candidate homeservers which are probably participating in the room. pdu (BaseEvent): event to be sent + event_format_version (int): The event format version Return: Deferred: resolves to a dict with members ``origin`` (a string @@ -636,12 +678,12 @@ class FederationClient(FederationBase): logger.debug("Got content: %s", content) state = [ - event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, event_format_version, outlier=True) for p in content.get("state", []) ] auth_chain = [ - event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, event_format_version, outlier=True) for p in content.get("auth_chain", []) ] @@ -650,9 +692,21 @@ class FederationClient(FederationBase): for p in itertools.chain(state, auth_chain) } + room_version = None + for e in state: + if (e.type, e.state_key) == (EventTypes.Create, ""): + room_version = e.content.get("room_version", RoomVersions.V1) + break + + if room_version is None: + # If the state doesn't have a create event then the room is + # invalid, and it would fail auth checks anyway. + raise SynapseError(400, "No create event in state") + valid_pdus = yield self._check_sigs_and_hash_and_fetch( destination, list(pdus.values()), outlier=True, + room_version=room_version, ) valid_pdus_map = { @@ -690,32 +744,90 @@ class FederationClient(FederationBase): @defer.inlineCallbacks def send_invite(self, destination, room_id, event_id, pdu): - time_now = self._clock.time_msec() - try: - code, content = yield self.transport_layer.send_invite( - destination=destination, - room_id=room_id, - event_id=event_id, - content=pdu.get_pdu_json(time_now), - ) - except HttpResponseException as e: - if e.code == 403: - raise e.to_synapse_error() - raise + room_version = yield self.store.get_room_version(room_id) + + content = yield self._do_send_invite(destination, pdu, room_version) pdu_dict = content["event"] logger.debug("Got response to send_invite: %s", pdu_dict) - pdu = event_from_pdu_json(pdu_dict) + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + + pdu = event_from_pdu_json(pdu_dict, format_ver) # Check signatures are correct. - pdu = yield self._check_sigs_and_hash(pdu) + pdu = yield self._check_sigs_and_hash(room_version, pdu) # FIXME: We should handle signature failures more gracefully. defer.returnValue(pdu) + @defer.inlineCallbacks + def _do_send_invite(self, destination, pdu, room_version): + """Actually sends the invite, first trying v2 API and falling back to + v1 API if necessary. + + Args: + destination (str): Target server + pdu (FrozenEvent) + room_version (str) + + Returns: + dict: The event as a dict as returned by the remote server + """ + time_now = self._clock.time_msec() + + try: + content = yield self.transport_layer.send_invite_v2( + destination=destination, + room_id=pdu.room_id, + event_id=pdu.event_id, + content={ + "event": pdu.get_pdu_json(time_now), + "room_version": room_version, + "invite_room_state": pdu.unsigned.get("invite_room_state", []), + }, + ) + defer.returnValue(content) + except HttpResponseException as e: + if e.code in [400, 404]: + err = e.to_synapse_error() + + # If we receive an error response that isn't a generic error, we + # assume that the remote understands the v2 invite API and this + # is a legitimate error. + if err.errcode != Codes.UNKNOWN: + raise err + + # Otherwise, we assume that the remote server doesn't understand + # the v2 invite API. + + if room_version in (RoomVersions.V1, RoomVersions.V2): + pass # We'll fall through + else: + raise SynapseError( + 400, + "User's homeserver does not support this room version", + Codes.UNSUPPORTED_ROOM_VERSION, + ) + elif e.code == 403: + raise e.to_synapse_error() + else: + raise + + # Didn't work, try v1 API. + # Note the v1 API returns a tuple of `(200, content)` + + _, content = yield self.transport_layer.send_invite_v1( + destination=destination, + room_id=pdu.room_id, + event_id=pdu.event_id, + content=pdu.get_pdu_json(time_now), + ) + defer.returnValue(content) + def send_leave(self, destinations, pdu): """Sends a leave event to one of a list of homeservers. @@ -785,13 +897,16 @@ class FederationClient(FederationBase): content=send_content, ) + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + auth_chain = [ - event_from_pdu_json(e) + event_from_pdu_json(e, format_ver) for e in content["auth_chain"] ] signed_auth = yield self._check_sigs_and_hash_and_fetch( - destination, auth_chain, outlier=True + destination, auth_chain, outlier=True, room_version=room_version, ) signed_auth.sort(key=lambda e: e.depth) @@ -833,13 +948,16 @@ class FederationClient(FederationBase): timeout=timeout, ) + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + events = [ - event_from_pdu_json(e) + event_from_pdu_json(e, format_ver) for e in content.get("events", []) ] signed_events = yield self._check_sigs_and_hash_and_fetch( - destination, events, outlier=False + destination, events, outlier=False, room_version=room_version, ) except HttpResponseException as e: if not e.code == 400: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 0f9302a6a8..569eb277a9 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -25,15 +25,17 @@ from twisted.internet import defer from twisted.internet.abstract import isIPAddress from twisted.python import failure -from synapse.api.constants import EventTypes +from synapse.api.constants import KNOWN_ROOM_VERSIONS, EventTypes, Membership from synapse.api.errors import ( AuthError, + Codes, FederationError, IncompatibleRoomVersionError, NotFoundError, SynapseError, ) from synapse.crypto.event_signing import compute_event_signature +from synapse.events import room_version_to_event_format from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction @@ -147,6 +149,22 @@ class FederationServer(FederationBase): logger.debug("[%s] Transaction is new", transaction.transaction_id) + # Reject if PDU count > 50 and EDU count > 100 + if (len(transaction.pdus) > 50 + or (hasattr(transaction, "edus") and len(transaction.edus) > 100)): + + logger.info( + "Transaction PDU or EDU count too large. Returning 400", + ) + + response = {} + yield self.transaction_actions.set_response( + origin, + transaction, + 400, response + ) + defer.returnValue((400, response)) + received_pdus_counter.inc(len(transaction.pdus)) origin_host, _ = parse_server_name(origin) @@ -162,8 +180,29 @@ class FederationServer(FederationBase): p["age_ts"] = request_time - int(p["age"]) del p["age"] - event = event_from_pdu_json(p) - room_id = event.room_id + # We try and pull out an event ID so that if later checks fail we + # can log something sensible. We don't mandate an event ID here in + # case future event formats get rid of the key. + possible_event_id = p.get("event_id", "<Unknown>") + + # Now we get the room ID so that we can check that we know the + # version of the room. + room_id = p.get("room_id") + if not room_id: + logger.info( + "Ignoring PDU as does not have a room_id. Event ID: %s", + possible_event_id, + ) + continue + + try: + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + except NotFoundError: + logger.info("Ignoring PDU for unknown room_id: %s", room_id) + continue + + event = event_from_pdu_json(p, format_ver) pdus_by_room.setdefault(room_id, []).append(event) pdu_results = {} @@ -201,8 +240,9 @@ class FederationServer(FederationBase): f = failure.Failure() pdu_results[event_id] = {"error": str(e)} logger.error( - "Failed to handle PDU %s: %s", - event_id, f.getTraceback().rstrip(), + "Failed to handle PDU %s", + event_id, + exc_info=(f.type, f.value, f.getTracebackObject()), ) yield concurrently_execute( @@ -300,7 +340,7 @@ class FederationServer(FederationBase): if self.hs.is_mine_id(event.event_id): event.signatures.update( compute_event_signature( - event, + event.get_pdu_json(), self.hs.hostname, self.hs.config.signing_key[0] ) @@ -324,11 +364,6 @@ class FederationServer(FederationBase): defer.returnValue((404, "")) @defer.inlineCallbacks - @log_function - def on_pull_request(self, origin, versions): - raise NotImplementedError("Pull transactions not implemented") - - @defer.inlineCallbacks def on_query_request(self, query_type, args): received_queries_counter.labels(query_type).inc() resp = yield self.registry.on_query(query_type, args) @@ -352,18 +387,30 @@ class FederationServer(FederationBase): }) @defer.inlineCallbacks - def on_invite_request(self, origin, content): - pdu = event_from_pdu_json(content) + def on_invite_request(self, origin, content, room_version): + if room_version not in KNOWN_ROOM_VERSIONS: + raise SynapseError( + 400, + "Homeserver does not support this room version", + Codes.UNSUPPORTED_ROOM_VERSION, + ) + + format_ver = room_version_to_event_format(room_version) + + pdu = event_from_pdu_json(content, format_ver) origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, pdu.room_id) ret_pdu = yield self.handler.on_invite_request(origin, pdu) time_now = self._clock.time_msec() - defer.returnValue((200, {"event": ret_pdu.get_pdu_json(time_now)})) + defer.returnValue({"event": ret_pdu.get_pdu_json(time_now)}) @defer.inlineCallbacks - def on_send_join_request(self, origin, content): + def on_send_join_request(self, origin, content, room_id): logger.debug("on_send_join_request: content: %s", content) - pdu = event_from_pdu_json(content) + + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + pdu = event_from_pdu_json(content, format_ver) origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, pdu.room_id) @@ -383,13 +430,22 @@ class FederationServer(FederationBase): origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, room_id) pdu = yield self.handler.on_make_leave_request(room_id, user_id) + + room_version = yield self.store.get_room_version(room_id) + time_now = self._clock.time_msec() - defer.returnValue({"event": pdu.get_pdu_json(time_now)}) + defer.returnValue({ + "event": pdu.get_pdu_json(time_now), + "room_version": room_version, + }) @defer.inlineCallbacks - def on_send_leave_request(self, origin, content): + def on_send_leave_request(self, origin, content, room_id): logger.debug("on_send_leave_request: content: %s", content) - pdu = event_from_pdu_json(content) + + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + pdu = event_from_pdu_json(content, format_ver) origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, pdu.room_id) @@ -435,13 +491,16 @@ class FederationServer(FederationBase): origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, room_id) + room_version = yield self.store.get_room_version(room_id) + format_ver = room_version_to_event_format(room_version) + auth_chain = [ - event_from_pdu_json(e) + event_from_pdu_json(e, format_ver) for e in content["auth_chain"] ] signed_auth = yield self._check_sigs_and_hash_and_fetch( - origin, auth_chain, outlier=True + origin, auth_chain, outlier=True, room_version=room_version, ) ret = yield self.handler.on_query_auth( @@ -586,16 +645,19 @@ class FederationServer(FederationBase): """ # check that it's actually being sent from a valid destination to # workaround bug #1753 in 0.18.5 and 0.18.6 - if origin != get_domain_from_id(pdu.event_id): + if origin != get_domain_from_id(pdu.sender): # We continue to accept join events from any server; this is # necessary for the federation join dance to work correctly. # (When we join over federation, the "helper" server is # responsible for sending out the join event, rather than the - # origin. See bug #1893). + # origin. See bug #1893. This is also true for some third party + # invites). if not ( pdu.type == 'm.room.member' and pdu.content and - pdu.content.get("membership", None) == 'join' + pdu.content.get("membership", None) in ( + Membership.JOIN, Membership.INVITE, + ) ): logger.info( "Discarding PDU %s from invalid origin %s", @@ -608,9 +670,12 @@ class FederationServer(FederationBase): pdu.event_id, origin ) + # We've already checked that we know the room version by this point + room_version = yield self.store.get_room_version(pdu.room_id) + # Check signature. try: - pdu = yield self._check_sigs_and_hash(pdu) + pdu = yield self._check_sigs_and_hash(room_version, pdu) except SynapseError as e: raise FederationError( "ERROR", diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 3fdd63be95..30941f5ad6 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -22,14 +22,17 @@ from prometheus_client import Counter from twisted.internet import defer import synapse.metrics -from synapse.api.errors import FederationDeniedError, HttpResponseException +from synapse.api.errors import ( + FederationDeniedError, + HttpResponseException, + RequestSendFailed, +) from synapse.handlers.presence import format_user_presence_state, get_interested_remotes from synapse.metrics import ( LaterGauge, event_processing_loop_counter, event_processing_loop_room_count, events_processed_counter, - sent_edus_counter, sent_transactions_counter, ) from synapse.metrics.background_process_metrics import run_as_background_process @@ -43,10 +46,24 @@ from .units import Edu, Transaction logger = logging.getLogger(__name__) sent_pdus_destination_dist_count = Counter( - "synapse_federation_client_sent_pdu_destinations:count", "" + "synapse_federation_client_sent_pdu_destinations:count", + "Number of PDUs queued for sending to one or more destinations", ) + sent_pdus_destination_dist_total = Counter( "synapse_federation_client_sent_pdu_destinations:total", "" + "Total number of PDUs queued for sending across all destinations", +) + +sent_edus_counter = Counter( + "synapse_federation_client_sent_edus", + "Total number of EDUs successfully sent", +) + +sent_edus_by_type = Counter( + "synapse_federation_client_sent_edus_by_type", + "Number of sent EDUs successfully sent, by event type", + ["type"], ) @@ -171,7 +188,7 @@ class TransactionQueue(object): def handle_event(event): # Only send events for this server. send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of() - is_mine = self.is_mine_id(event.event_id) + is_mine = self.is_mine_id(event.sender) if not is_mine and send_on_behalf_of is None: return @@ -183,9 +200,7 @@ class TransactionQueue(object): # banned then it won't receive the event because it won't # be in the room after the ban. destinations = yield self.state.get_current_hosts_in_room( - event.room_id, latest_event_ids=[ - prev_id for prev_id, _ in event.prev_events - ], + event.room_id, latest_event_ids=event.prev_event_ids(), ) except Exception: logger.exception( @@ -358,8 +373,6 @@ class TransactionQueue(object): logger.info("Not sending EDU to ourselves") return - sent_edus_counter.inc() - if key: self.pending_edus_keyed_by_dest.setdefault( destination, {} @@ -494,6 +507,9 @@ class TransactionQueue(object): ) if success: sent_transactions_counter.inc() + sent_edus_counter.inc(len(pending_edus)) + for edu in pending_edus: + sent_edus_by_type.labels(edu.edu_type).inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: @@ -520,11 +536,21 @@ class TransactionQueue(object): ) except FederationDeniedError as e: logger.info(e) - except Exception as e: - logger.warn( - "TX [%s] Failed to send transaction: %s", + except HttpResponseException as e: + logger.warning( + "TX [%s] Received %d response to transaction: %s", + destination, e.code, e, + ) + except RequestSendFailed as e: + logger.warning("TX [%s] Failed to send transaction: %s", destination, e) + + for p, _ in pending_pdus: + logger.info("Failed to send event %s to %s", p.event_id, + destination) + except Exception: + logger.exception( + "TX [%s] Failed to send transaction", destination, - e, ) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index edba5a9808..8e2be218e2 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -21,7 +21,7 @@ from six.moves import urllib from twisted.internet import defer from synapse.api.constants import Membership -from synapse.api.urls import FEDERATION_PREFIX as PREFIX +from synapse.api.urls import FEDERATION_V1_PREFIX, FEDERATION_V2_PREFIX from synapse.util.logutils import log_function logger = logging.getLogger(__name__) @@ -51,7 +51,7 @@ class TransportLayerClient(object): logger.debug("get_room_state dest=%s, room=%s", destination, room_id) - path = _create_path(PREFIX, "/state/%s/", room_id) + path = _create_v1_path("/state/%s/", room_id) return self.client.get_json( destination, path=path, args={"event_id": event_id}, ) @@ -73,7 +73,7 @@ class TransportLayerClient(object): logger.debug("get_room_state_ids dest=%s, room=%s", destination, room_id) - path = _create_path(PREFIX, "/state_ids/%s/", room_id) + path = _create_v1_path("/state_ids/%s/", room_id) return self.client.get_json( destination, path=path, args={"event_id": event_id}, ) @@ -95,7 +95,7 @@ class TransportLayerClient(object): logger.debug("get_pdu dest=%s, event_id=%s", destination, event_id) - path = _create_path(PREFIX, "/event/%s/", event_id) + path = _create_v1_path("/event/%s/", event_id) return self.client.get_json(destination, path=path, timeout=timeout) @log_function @@ -121,7 +121,7 @@ class TransportLayerClient(object): # TODO: raise? return - path = _create_path(PREFIX, "/backfill/%s/", room_id) + path = _create_v1_path("/backfill/%s/", room_id) args = { "v": event_tuples, @@ -167,7 +167,7 @@ class TransportLayerClient(object): # generated by the json_data_callback. json_data = transaction.get_dict() - path = _create_path(PREFIX, "/send/%s/", transaction.transaction_id) + path = _create_v1_path("/send/%s/", transaction.transaction_id) response = yield self.client.put_json( transaction.destination, @@ -184,7 +184,7 @@ class TransportLayerClient(object): @log_function def make_query(self, destination, query_type, args, retry_on_dns_fail, ignore_backoff=False): - path = _create_path(PREFIX, "/query/%s", query_type) + path = _create_v1_path("/query/%s", query_type) content = yield self.client.get_json( destination=destination, @@ -231,7 +231,7 @@ class TransportLayerClient(object): "make_membership_event called with membership='%s', must be one of %s" % (membership, ",".join(valid_memberships)) ) - path = _create_path(PREFIX, "/make_%s/%s/%s", membership, room_id, user_id) + path = _create_v1_path("/make_%s/%s/%s", membership, room_id, user_id) ignore_backoff = False retry_on_dns_fail = False @@ -258,7 +258,7 @@ class TransportLayerClient(object): @defer.inlineCallbacks @log_function def send_join(self, destination, room_id, event_id, content): - path = _create_path(PREFIX, "/send_join/%s/%s", room_id, event_id) + path = _create_v1_path("/send_join/%s/%s", room_id, event_id) response = yield self.client.put_json( destination=destination, @@ -271,7 +271,7 @@ class TransportLayerClient(object): @defer.inlineCallbacks @log_function def send_leave(self, destination, room_id, event_id, content): - path = _create_path(PREFIX, "/send_leave/%s/%s", room_id, event_id) + path = _create_v1_path("/send_leave/%s/%s", room_id, event_id) response = yield self.client.put_json( destination=destination, @@ -289,8 +289,22 @@ class TransportLayerClient(object): @defer.inlineCallbacks @log_function - def send_invite(self, destination, room_id, event_id, content): - path = _create_path(PREFIX, "/invite/%s/%s", room_id, event_id) + def send_invite_v1(self, destination, room_id, event_id, content): + path = _create_v1_path("/invite/%s/%s", room_id, event_id) + + response = yield self.client.put_json( + destination=destination, + path=path, + data=content, + ignore_backoff=True, + ) + + defer.returnValue(response) + + @defer.inlineCallbacks + @log_function + def send_invite_v2(self, destination, room_id, event_id, content): + path = _create_v2_path("/invite/%s/%s", room_id, event_id) response = yield self.client.put_json( destination=destination, @@ -306,7 +320,7 @@ class TransportLayerClient(object): def get_public_rooms(self, remote_server, limit, since_token, search_filter=None, include_all_networks=False, third_party_instance_id=None): - path = PREFIX + "/publicRooms" + path = _create_v1_path("/publicRooms") args = { "include_all_networks": "true" if include_all_networks else "false", @@ -332,7 +346,7 @@ class TransportLayerClient(object): @defer.inlineCallbacks @log_function def exchange_third_party_invite(self, destination, room_id, event_dict): - path = _create_path(PREFIX, "/exchange_third_party_invite/%s", room_id,) + path = _create_v1_path("/exchange_third_party_invite/%s", room_id,) response = yield self.client.put_json( destination=destination, @@ -345,7 +359,7 @@ class TransportLayerClient(object): @defer.inlineCallbacks @log_function def get_event_auth(self, destination, room_id, event_id): - path = _create_path(PREFIX, "/event_auth/%s/%s", room_id, event_id) + path = _create_v1_path("/event_auth/%s/%s", room_id, event_id) content = yield self.client.get_json( destination=destination, @@ -357,7 +371,7 @@ class TransportLayerClient(object): @defer.inlineCallbacks @log_function def send_query_auth(self, destination, room_id, event_id, content): - path = _create_path(PREFIX, "/query_auth/%s/%s", room_id, event_id) + path = _create_v1_path("/query_auth/%s/%s", room_id, event_id) content = yield self.client.post_json( destination=destination, @@ -392,7 +406,7 @@ class TransportLayerClient(object): Returns: A dict containg the device keys. """ - path = PREFIX + "/user/keys/query" + path = _create_v1_path("/user/keys/query") content = yield self.client.post_json( destination=destination, @@ -419,7 +433,7 @@ class TransportLayerClient(object): Returns: A dict containg the device keys. """ - path = _create_path(PREFIX, "/user/devices/%s", user_id) + path = _create_v1_path("/user/devices/%s", user_id) content = yield self.client.get_json( destination=destination, @@ -455,7 +469,7 @@ class TransportLayerClient(object): A dict containg the one-time keys. """ - path = PREFIX + "/user/keys/claim" + path = _create_v1_path("/user/keys/claim") content = yield self.client.post_json( destination=destination, @@ -469,7 +483,7 @@ class TransportLayerClient(object): @log_function def get_missing_events(self, destination, room_id, earliest_events, latest_events, limit, min_depth, timeout): - path = _create_path(PREFIX, "/get_missing_events/%s", room_id,) + path = _create_v1_path("/get_missing_events/%s", room_id,) content = yield self.client.post_json( destination=destination, @@ -489,7 +503,7 @@ class TransportLayerClient(object): def get_group_profile(self, destination, group_id, requester_user_id): """Get a group profile """ - path = _create_path(PREFIX, "/groups/%s/profile", group_id,) + path = _create_v1_path("/groups/%s/profile", group_id,) return self.client.get_json( destination=destination, @@ -508,7 +522,7 @@ class TransportLayerClient(object): requester_user_id (str) content (dict): The new profile of the group """ - path = _create_path(PREFIX, "/groups/%s/profile", group_id,) + path = _create_v1_path("/groups/%s/profile", group_id,) return self.client.post_json( destination=destination, @@ -522,7 +536,7 @@ class TransportLayerClient(object): def get_group_summary(self, destination, group_id, requester_user_id): """Get a group summary """ - path = _create_path(PREFIX, "/groups/%s/summary", group_id,) + path = _create_v1_path("/groups/%s/summary", group_id,) return self.client.get_json( destination=destination, @@ -535,7 +549,7 @@ class TransportLayerClient(object): def get_rooms_in_group(self, destination, group_id, requester_user_id): """Get all rooms in a group """ - path = _create_path(PREFIX, "/groups/%s/rooms", group_id,) + path = _create_v1_path("/groups/%s/rooms", group_id,) return self.client.get_json( destination=destination, @@ -548,7 +562,7 @@ class TransportLayerClient(object): content): """Add a room to a group """ - path = _create_path(PREFIX, "/groups/%s/room/%s", group_id, room_id,) + path = _create_v1_path("/groups/%s/room/%s", group_id, room_id,) return self.client.post_json( destination=destination, @@ -562,8 +576,8 @@ class TransportLayerClient(object): config_key, content): """Update room in group """ - path = _create_path( - PREFIX, "/groups/%s/room/%s/config/%s", + path = _create_v1_path( + "/groups/%s/room/%s/config/%s", group_id, room_id, config_key, ) @@ -578,7 +592,7 @@ class TransportLayerClient(object): def remove_room_from_group(self, destination, group_id, requester_user_id, room_id): """Remove a room from a group """ - path = _create_path(PREFIX, "/groups/%s/room/%s", group_id, room_id,) + path = _create_v1_path("/groups/%s/room/%s", group_id, room_id,) return self.client.delete_json( destination=destination, @@ -591,7 +605,7 @@ class TransportLayerClient(object): def get_users_in_group(self, destination, group_id, requester_user_id): """Get users in a group """ - path = _create_path(PREFIX, "/groups/%s/users", group_id,) + path = _create_v1_path("/groups/%s/users", group_id,) return self.client.get_json( destination=destination, @@ -604,7 +618,7 @@ class TransportLayerClient(object): def get_invited_users_in_group(self, destination, group_id, requester_user_id): """Get users that have been invited to a group """ - path = _create_path(PREFIX, "/groups/%s/invited_users", group_id,) + path = _create_v1_path("/groups/%s/invited_users", group_id,) return self.client.get_json( destination=destination, @@ -617,8 +631,8 @@ class TransportLayerClient(object): def accept_group_invite(self, destination, group_id, user_id, content): """Accept a group invite """ - path = _create_path( - PREFIX, "/groups/%s/users/%s/accept_invite", + path = _create_v1_path( + "/groups/%s/users/%s/accept_invite", group_id, user_id, ) @@ -633,7 +647,7 @@ class TransportLayerClient(object): def join_group(self, destination, group_id, user_id, content): """Attempts to join a group """ - path = _create_path(PREFIX, "/groups/%s/users/%s/join", group_id, user_id) + path = _create_v1_path("/groups/%s/users/%s/join", group_id, user_id) return self.client.post_json( destination=destination, @@ -646,7 +660,7 @@ class TransportLayerClient(object): def invite_to_group(self, destination, group_id, user_id, requester_user_id, content): """Invite a user to a group """ - path = _create_path(PREFIX, "/groups/%s/users/%s/invite", group_id, user_id) + path = _create_v1_path("/groups/%s/users/%s/invite", group_id, user_id) return self.client.post_json( destination=destination, @@ -662,7 +676,7 @@ class TransportLayerClient(object): invited. """ - path = _create_path(PREFIX, "/groups/local/%s/users/%s/invite", group_id, user_id) + path = _create_v1_path("/groups/local/%s/users/%s/invite", group_id, user_id) return self.client.post_json( destination=destination, @@ -676,7 +690,7 @@ class TransportLayerClient(object): user_id, content): """Remove a user fron a group """ - path = _create_path(PREFIX, "/groups/%s/users/%s/remove", group_id, user_id) + path = _create_v1_path("/groups/%s/users/%s/remove", group_id, user_id) return self.client.post_json( destination=destination, @@ -693,7 +707,7 @@ class TransportLayerClient(object): kicked from the group. """ - path = _create_path(PREFIX, "/groups/local/%s/users/%s/remove", group_id, user_id) + path = _create_v1_path("/groups/local/%s/users/%s/remove", group_id, user_id) return self.client.post_json( destination=destination, @@ -708,7 +722,7 @@ class TransportLayerClient(object): the attestations """ - path = _create_path(PREFIX, "/groups/%s/renew_attestation/%s", group_id, user_id) + path = _create_v1_path("/groups/%s/renew_attestation/%s", group_id, user_id) return self.client.post_json( destination=destination, @@ -723,12 +737,12 @@ class TransportLayerClient(object): """Update a room entry in a group summary """ if category_id: - path = _create_path( - PREFIX, "/groups/%s/summary/categories/%s/rooms/%s", + path = _create_v1_path( + "/groups/%s/summary/categories/%s/rooms/%s", group_id, category_id, room_id, ) else: - path = _create_path(PREFIX, "/groups/%s/summary/rooms/%s", group_id, room_id,) + path = _create_v1_path("/groups/%s/summary/rooms/%s", group_id, room_id,) return self.client.post_json( destination=destination, @@ -744,12 +758,12 @@ class TransportLayerClient(object): """Delete a room entry in a group summary """ if category_id: - path = _create_path( - PREFIX + "/groups/%s/summary/categories/%s/rooms/%s", + path = _create_v1_path( + "/groups/%s/summary/categories/%s/rooms/%s", group_id, category_id, room_id, ) else: - path = _create_path(PREFIX, "/groups/%s/summary/rooms/%s", group_id, room_id,) + path = _create_v1_path("/groups/%s/summary/rooms/%s", group_id, room_id,) return self.client.delete_json( destination=destination, @@ -762,7 +776,7 @@ class TransportLayerClient(object): def get_group_categories(self, destination, group_id, requester_user_id): """Get all categories in a group """ - path = _create_path(PREFIX, "/groups/%s/categories", group_id,) + path = _create_v1_path("/groups/%s/categories", group_id,) return self.client.get_json( destination=destination, @@ -775,7 +789,7 @@ class TransportLayerClient(object): def get_group_category(self, destination, group_id, requester_user_id, category_id): """Get category info in a group """ - path = _create_path(PREFIX, "/groups/%s/categories/%s", group_id, category_id,) + path = _create_v1_path("/groups/%s/categories/%s", group_id, category_id,) return self.client.get_json( destination=destination, @@ -789,7 +803,7 @@ class TransportLayerClient(object): content): """Update a category in a group """ - path = _create_path(PREFIX, "/groups/%s/categories/%s", group_id, category_id,) + path = _create_v1_path("/groups/%s/categories/%s", group_id, category_id,) return self.client.post_json( destination=destination, @@ -804,7 +818,7 @@ class TransportLayerClient(object): category_id): """Delete a category in a group """ - path = _create_path(PREFIX, "/groups/%s/categories/%s", group_id, category_id,) + path = _create_v1_path("/groups/%s/categories/%s", group_id, category_id,) return self.client.delete_json( destination=destination, @@ -817,7 +831,7 @@ class TransportLayerClient(object): def get_group_roles(self, destination, group_id, requester_user_id): """Get all roles in a group """ - path = _create_path(PREFIX, "/groups/%s/roles", group_id,) + path = _create_v1_path("/groups/%s/roles", group_id,) return self.client.get_json( destination=destination, @@ -830,7 +844,7 @@ class TransportLayerClient(object): def get_group_role(self, destination, group_id, requester_user_id, role_id): """Get a roles info """ - path = _create_path(PREFIX, "/groups/%s/roles/%s", group_id, role_id,) + path = _create_v1_path("/groups/%s/roles/%s", group_id, role_id,) return self.client.get_json( destination=destination, @@ -844,7 +858,7 @@ class TransportLayerClient(object): content): """Update a role in a group """ - path = _create_path(PREFIX, "/groups/%s/roles/%s", group_id, role_id,) + path = _create_v1_path("/groups/%s/roles/%s", group_id, role_id,) return self.client.post_json( destination=destination, @@ -858,7 +872,7 @@ class TransportLayerClient(object): def delete_group_role(self, destination, group_id, requester_user_id, role_id): """Delete a role in a group """ - path = _create_path(PREFIX, "/groups/%s/roles/%s", group_id, role_id,) + path = _create_v1_path("/groups/%s/roles/%s", group_id, role_id,) return self.client.delete_json( destination=destination, @@ -873,12 +887,12 @@ class TransportLayerClient(object): """Update a users entry in a group """ if role_id: - path = _create_path( - PREFIX, "/groups/%s/summary/roles/%s/users/%s", + path = _create_v1_path( + "/groups/%s/summary/roles/%s/users/%s", group_id, role_id, user_id, ) else: - path = _create_path(PREFIX, "/groups/%s/summary/users/%s", group_id, user_id,) + path = _create_v1_path("/groups/%s/summary/users/%s", group_id, user_id,) return self.client.post_json( destination=destination, @@ -893,7 +907,7 @@ class TransportLayerClient(object): content): """Sets the join policy for a group """ - path = _create_path(PREFIX, "/groups/%s/settings/m.join_policy", group_id,) + path = _create_v1_path("/groups/%s/settings/m.join_policy", group_id,) return self.client.put_json( destination=destination, @@ -909,12 +923,12 @@ class TransportLayerClient(object): """Delete a users entry in a group """ if role_id: - path = _create_path( - PREFIX, "/groups/%s/summary/roles/%s/users/%s", + path = _create_v1_path( + "/groups/%s/summary/roles/%s/users/%s", group_id, role_id, user_id, ) else: - path = _create_path(PREFIX, "/groups/%s/summary/users/%s", group_id, user_id,) + path = _create_v1_path("/groups/%s/summary/users/%s", group_id, user_id,) return self.client.delete_json( destination=destination, @@ -927,7 +941,7 @@ class TransportLayerClient(object): """Get the groups a list of users are publicising """ - path = PREFIX + "/get_groups_publicised" + path = _create_v1_path("/get_groups_publicised") content = {"user_ids": user_ids} @@ -939,20 +953,43 @@ class TransportLayerClient(object): ) -def _create_path(prefix, path, *args): - """Creates a path from the prefix, path template and args. Ensures that - all args are url encoded. +def _create_v1_path(path, *args): + """Creates a path against V1 federation API from the path template and + args. Ensures that all args are url encoded. + + Example: + + _create_v1_path("/event/%s/", event_id) + + Args: + path (str): String template for the path + args: ([str]): Args to insert into path. Each arg will be url encoded + + Returns: + str + """ + return ( + FEDERATION_V1_PREFIX + + path % tuple(urllib.parse.quote(arg, "") for arg in args) + ) + + +def _create_v2_path(path, *args): + """Creates a path against V2 federation API from the path template and + args. Ensures that all args are url encoded. Example: - _create_path(PREFIX, "/event/%s/", event_id) + _create_v2_path("/event/%s/", event_id) Args: - prefix (str) path (str): String template for the path args: ([str]): Args to insert into path. Each arg will be url encoded Returns: str """ - return prefix + path % tuple(urllib.parse.quote(arg, "") for arg in args) + return ( + FEDERATION_V2_PREFIX + + path % tuple(urllib.parse.quote(arg, "") for arg in args) + ) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 6d4a26f595..5ba94be2ec 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -21,8 +21,9 @@ import re from twisted.internet import defer import synapse +from synapse.api.constants import RoomVersions from synapse.api.errors import Codes, FederationDeniedError, SynapseError -from synapse.api.urls import FEDERATION_PREFIX as PREFIX +from synapse.api.urls import FEDERATION_V1_PREFIX, FEDERATION_V2_PREFIX from synapse.http.endpoint import parse_and_validate_server_name from synapse.http.server import JsonResource from synapse.http.servlet import ( @@ -42,9 +43,20 @@ logger = logging.getLogger(__name__) class TransportLayerServer(JsonResource): """Handles incoming federation HTTP requests""" - def __init__(self, hs): + def __init__(self, hs, servlet_groups=None): + """Initialize the TransportLayerServer + + Will by default register all servlets. For custom behaviour, pass in + a list of servlet_groups to register. + + Args: + hs (synapse.server.HomeServer): homeserver + servlet_groups (list[str], optional): List of servlet groups to register. + Defaults to ``DEFAULT_SERVLET_GROUPS``. + """ self.hs = hs self.clock = hs.get_clock() + self.servlet_groups = servlet_groups super(TransportLayerServer, self).__init__(hs, canonical_json=False) @@ -66,6 +78,7 @@ class TransportLayerServer(JsonResource): resource=self, ratelimiter=self.ratelimiter, authenticator=self.authenticator, + servlet_groups=self.servlet_groups, ) @@ -227,6 +240,8 @@ class BaseFederationServlet(object): """ REQUIRE_AUTH = True + PREFIX = FEDERATION_V1_PREFIX # Allows specifying the API version + def __init__(self, handler, authenticator, ratelimiter, server_name): self.handler = handler self.authenticator = authenticator @@ -286,7 +301,7 @@ class BaseFederationServlet(object): return new_func def register(self, server): - pattern = re.compile("^" + PREFIX + self.PATH + "$") + pattern = re.compile("^" + self.PREFIX + self.PATH + "$") for method in ("GET", "PUT", "POST"): code = getattr(self, "on_%s" % (method), None) @@ -362,14 +377,6 @@ class FederationSendServlet(BaseFederationServlet): defer.returnValue((code, response)) -class FederationPullServlet(BaseFederationServlet): - PATH = "/pull/" - - # This is for when someone asks us for everything since version X - def on_GET(self, origin, content, query): - return self.handler.on_pull_request(query["origin"][0], query["v"]) - - class FederationEventServlet(BaseFederationServlet): PATH = "/event/(?P<event_id>[^/]*)/" @@ -474,7 +481,7 @@ class FederationSendLeaveServlet(BaseFederationServlet): @defer.inlineCallbacks def on_PUT(self, origin, content, query, room_id, event_id): - content = yield self.handler.on_send_leave_request(origin, content) + content = yield self.handler.on_send_leave_request(origin, content, room_id) defer.returnValue((200, content)) @@ -492,18 +499,50 @@ class FederationSendJoinServlet(BaseFederationServlet): def on_PUT(self, origin, content, query, context, event_id): # TODO(paul): assert that context/event_id parsed from path actually # match those given in content - content = yield self.handler.on_send_join_request(origin, content) + content = yield self.handler.on_send_join_request(origin, content, context) defer.returnValue((200, content)) -class FederationInviteServlet(BaseFederationServlet): +class FederationV1InviteServlet(BaseFederationServlet): PATH = "/invite/(?P<context>[^/]*)/(?P<event_id>[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, context, event_id): + # We don't get a room version, so we have to assume its EITHER v1 or + # v2. This is "fine" as the only difference between V1 and V2 is the + # state resolution algorithm, and we don't use that for processing + # invites + content = yield self.handler.on_invite_request( + origin, content, room_version=RoomVersions.V1, + ) + + # V1 federation API is defined to return a content of `[200, {...}]` + # due to a historical bug. + defer.returnValue((200, (200, content))) + + +class FederationV2InviteServlet(BaseFederationServlet): + PATH = "/invite/(?P<context>[^/]*)/(?P<event_id>[^/]*)" + + PREFIX = FEDERATION_V2_PREFIX + + @defer.inlineCallbacks + def on_PUT(self, origin, content, query, context, event_id): # TODO(paul): assert that context/event_id parsed from path actually # match those given in content - content = yield self.handler.on_invite_request(origin, content) + + room_version = content["room_version"] + event = content["event"] + invite_room_state = content["invite_room_state"] + + # Synapse expects invite_room_state to be in unsigned, as it is in v1 + # API + + event.setdefault("unsigned", {})["invite_room_state"] = invite_room_state + + content = yield self.handler.on_invite_request( + origin, event, room_version=room_version, + ) defer.returnValue((200, content)) @@ -1262,7 +1301,6 @@ class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): FEDERATION_SERVLET_CLASSES = ( FederationSendServlet, - FederationPullServlet, FederationEventServlet, FederationStateServlet, FederationStateIdsServlet, @@ -1273,7 +1311,8 @@ FEDERATION_SERVLET_CLASSES = ( FederationEventServlet, FederationSendJoinServlet, FederationSendLeaveServlet, - FederationInviteServlet, + FederationV1InviteServlet, + FederationV2InviteServlet, FederationQueryAuthServlet, FederationGetMissingEventsServlet, FederationEventAuthServlet, @@ -1282,10 +1321,12 @@ FEDERATION_SERVLET_CLASSES = ( FederationClientKeysClaimServlet, FederationThirdPartyInviteExchangeServlet, On3pidBindServlet, - OpenIdUserInfo, FederationVersionServlet, ) +OPENID_SERVLET_CLASSES = ( + OpenIdUserInfo, +) ROOM_LIST_CLASSES = ( PublicRoomList, @@ -1324,44 +1365,83 @@ GROUP_ATTESTATION_SERVLET_CLASSES = ( FederationGroupsRenewAttestaionServlet, ) +DEFAULT_SERVLET_GROUPS = ( + "federation", + "room_list", + "group_server", + "group_local", + "group_attestation", + "openid", +) + -def register_servlets(hs, resource, authenticator, ratelimiter): - for servletclass in FEDERATION_SERVLET_CLASSES: - servletclass( - handler=hs.get_federation_server(), - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) - - for servletclass in ROOM_LIST_CLASSES: - servletclass( - handler=hs.get_room_list_handler(), - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) - - for servletclass in GROUP_SERVER_SERVLET_CLASSES: - servletclass( - handler=hs.get_groups_server_handler(), - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) - - for servletclass in GROUP_LOCAL_SERVLET_CLASSES: - servletclass( - handler=hs.get_groups_local_handler(), - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) - - for servletclass in GROUP_ATTESTATION_SERVLET_CLASSES: - servletclass( - handler=hs.get_groups_attestation_renewer(), - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) +def register_servlets(hs, resource, authenticator, ratelimiter, servlet_groups=None): + """Initialize and register servlet classes. + + Will by default register all servlets. For custom behaviour, pass in + a list of servlet_groups to register. + + Args: + hs (synapse.server.HomeServer): homeserver + resource (TransportLayerServer): resource class to register to + authenticator (Authenticator): authenticator to use + ratelimiter (util.ratelimitutils.FederationRateLimiter): ratelimiter to use + servlet_groups (list[str], optional): List of servlet groups to register. + Defaults to ``DEFAULT_SERVLET_GROUPS``. + """ + if not servlet_groups: + servlet_groups = DEFAULT_SERVLET_GROUPS + + if "federation" in servlet_groups: + for servletclass in FEDERATION_SERVLET_CLASSES: + servletclass( + handler=hs.get_federation_server(), + authenticator=authenticator, + ratelimiter=ratelimiter, + server_name=hs.hostname, + ).register(resource) + + if "openid" in servlet_groups: + for servletclass in OPENID_SERVLET_CLASSES: + servletclass( + handler=hs.get_federation_server(), + authenticator=authenticator, + ratelimiter=ratelimiter, + server_name=hs.hostname, + ).register(resource) + + if "room_list" in servlet_groups: + for servletclass in ROOM_LIST_CLASSES: + servletclass( + handler=hs.get_room_list_handler(), + authenticator=authenticator, + ratelimiter=ratelimiter, + server_name=hs.hostname, + ).register(resource) + + if "group_server" in servlet_groups: + for servletclass in GROUP_SERVER_SERVLET_CLASSES: + servletclass( + handler=hs.get_groups_server_handler(), + authenticator=authenticator, + ratelimiter=ratelimiter, + server_name=hs.hostname, + ).register(resource) + + if "group_local" in servlet_groups: + for servletclass in GROUP_LOCAL_SERVLET_CLASSES: + servletclass( + handler=hs.get_groups_local_handler(), + authenticator=authenticator, + ratelimiter=ratelimiter, + server_name=hs.hostname, + ).register(resource) + + if "group_attestation" in servlet_groups: + for servletclass in GROUP_ATTESTATION_SERVLET_CLASSES: + servletclass( + handler=hs.get_groups_attestation_renewer(), + authenticator=authenticator, + ratelimiter=ratelimiter, + server_name=hs.hostname, + ).register(resource) diff --git a/synapse/federation/units.py b/synapse/federation/units.py index c5ab14314e..025a79c022 100644 --- a/synapse/federation/units.py +++ b/synapse/federation/units.py @@ -117,9 +117,6 @@ class Transaction(JsonEncodedObject): "Require 'transaction_id' to construct a Transaction" ) - for p in pdus: - p.transaction_id = kwargs["transaction_id"] - kwargs["pdus"] = [p.get_pdu_json() for p in pdus] return Transaction(**kwargs) diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index b04f4234ca..786149be65 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -42,7 +42,7 @@ from signedjson.sign import sign_json from twisted.internet import defer -from synapse.api.errors import SynapseError +from synapse.api.errors import RequestSendFailed, SynapseError from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import get_domain_from_id from synapse.util.logcontext import run_in_background @@ -191,6 +191,11 @@ class GroupAttestionRenewer(object): yield self.store.update_attestation_renewal( group_id, user_id, attestation ) + except RequestSendFailed as e: + logger.warning( + "Failed to renew attestation of %r in %r: %s", + user_id, group_id, e, + ) except Exception: logger.exception("Error renewing attestation of %r in %r", user_id, group_id) diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 413425fed1..2dd183018a 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -17,7 +17,6 @@ from .admin import AdminHandler from .directory import DirectoryHandler from .federation import FederationHandler from .identity import IdentityHandler -from .register import RegistrationHandler from .search import SearchHandler @@ -41,7 +40,6 @@ class Handlers(object): """ def __init__(self, hs): - self.registration_handler = RegistrationHandler(hs) self.federation_handler = FederationHandler(hs) self.directory_handler = DirectoryHandler(hs) self.admin_handler = AdminHandler(hs) diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 704181d2d3..594754cfd8 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -167,4 +167,4 @@ class BaseHandler(object): ratelimit=False, ) except Exception as e: - logger.warn("Error kicking guest user: %s" % (e,)) + logger.exception("Error kicking guest user: %s" % (e,)) diff --git a/synapse/handlers/acme.py b/synapse/handlers/acme.py new file mode 100644 index 0000000000..813777bf18 --- /dev/null +++ b/synapse/handlers/acme.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import attr +from zope.interface import implementer + +import twisted +import twisted.internet.error +from twisted.internet import defer +from twisted.python.filepath import FilePath +from twisted.python.url import URL +from twisted.web import server, static +from twisted.web.resource import Resource + +from synapse.app import check_bind_error + +logger = logging.getLogger(__name__) + +try: + from txacme.interfaces import ICertificateStore + + @attr.s + @implementer(ICertificateStore) + class ErsatzStore(object): + """ + A store that only stores in memory. + """ + + certs = attr.ib(default=attr.Factory(dict)) + + def store(self, server_name, pem_objects): + self.certs[server_name] = [o.as_bytes() for o in pem_objects] + return defer.succeed(None) + + +except ImportError: + # txacme is missing + pass + + +class AcmeHandler(object): + def __init__(self, hs): + self.hs = hs + self.reactor = hs.get_reactor() + self._acme_domain = hs.config.acme_domain + + @defer.inlineCallbacks + def start_listening(self): + + # Configure logging for txacme, if you need to debug + # from eliot import add_destinations + # from eliot.twisted import TwistedDestination + # + # add_destinations(TwistedDestination()) + + from txacme.challenges import HTTP01Responder + from txacme.service import AcmeIssuingService + from txacme.endpoint import load_or_create_client_key + from txacme.client import Client + from josepy.jwa import RS256 + + self._store = ErsatzStore() + responder = HTTP01Responder() + + self._issuer = AcmeIssuingService( + cert_store=self._store, + client_creator=( + lambda: Client.from_url( + reactor=self.reactor, + url=URL.from_text(self.hs.config.acme_url), + key=load_or_create_client_key( + FilePath(self.hs.config.config_dir_path) + ), + alg=RS256, + ) + ), + clock=self.reactor, + responders=[responder], + ) + + well_known = Resource() + well_known.putChild(b'acme-challenge', responder.resource) + responder_resource = Resource() + responder_resource.putChild(b'.well-known', well_known) + responder_resource.putChild(b'check', static.Data(b'OK', b'text/plain')) + + srv = server.Site(responder_resource) + + bind_addresses = self.hs.config.acme_bind_addresses + for host in bind_addresses: + logger.info( + "Listening for ACME requests on %s:%i", host, self.hs.config.acme_port, + ) + try: + self.reactor.listenTCP( + self.hs.config.acme_port, + srv, + interface=host, + ) + except twisted.internet.error.CannotListenError as e: + check_bind_error(e, host, bind_addresses) + + # Make sure we are registered to the ACME server. There's no public API + # for this, it is usually triggered by startService, but since we don't + # want it to control where we save the certificates, we have to reach in + # and trigger the registration machinery ourselves. + self._issuer._registered = False + yield self._issuer._ensure_registered() + + @defer.inlineCallbacks + def provision_certificate(self): + + logger.warning("Reprovisioning %s", self._acme_domain) + + try: + yield self._issuer.issue_cert(self._acme_domain) + except Exception: + logger.exception("Fail!") + raise + logger.warning("Reprovisioned %s, saving.", self._acme_domain) + cert_chain = self._store.certs[self._acme_domain] + + try: + with open(self.hs.config.tls_private_key_file, "wb") as private_key_file: + for x in cert_chain: + if x.startswith(b"-----BEGIN RSA PRIVATE KEY-----"): + private_key_file.write(x) + + with open(self.hs.config.tls_certificate_file, "wb") as certificate_file: + for x in cert_chain: + if x.startswith(b"-----BEGIN CERTIFICATE-----"): + certificate_file.write(x) + except Exception: + logger.exception("Failed saving!") + raise + + defer.returnValue(True) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 329e3c7d71..2abd9af94f 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -59,6 +59,7 @@ class AuthHandler(BaseHandler): LoginType.EMAIL_IDENTITY: self._check_email_identity, LoginType.MSISDN: self._check_msisdn, LoginType.DUMMY: self._check_dummy_auth, + LoginType.TERMS: self._check_terms_auth, } self.bcrypt_rounds = hs.config.bcrypt_rounds @@ -431,6 +432,9 @@ class AuthHandler(BaseHandler): def _check_dummy_auth(self, authdict, _): return defer.succeed(True) + def _check_terms_auth(self, authdict, _): + return defer.succeed(True) + @defer.inlineCallbacks def _check_threepid(self, medium, authdict): if 'threepid_creds' not in authdict: @@ -462,6 +466,22 @@ class AuthHandler(BaseHandler): def _get_params_recaptcha(self): return {"public_key": self.hs.config.recaptcha_public_key} + def _get_params_terms(self): + return { + "policies": { + "privacy_policy": { + "version": self.hs.config.user_consent_version, + "en": { + "name": self.hs.config.user_consent_policy_name, + "url": "%s_matrix/consent?v=%s" % ( + self.hs.config.public_baseurl, + self.hs.config.user_consent_version, + ), + }, + }, + }, + } + def _auth_dict_for_flows(self, flows, session): public_flows = [] for f in flows: @@ -469,6 +489,7 @@ class AuthHandler(BaseHandler): get_params = { LoginType.RECAPTCHA: self._get_params_recaptcha, + LoginType.TERMS: self._get_params_terms, } params = {} @@ -542,10 +563,10 @@ class AuthHandler(BaseHandler): insensitively, but return None if there are multiple inexact matches. Args: - (str) user_id: complete @user:id + (unicode|bytes) user_id: complete @user:id Returns: - defer.Deferred: (str) canonical_user_id, or None if zero or + defer.Deferred: (unicode) canonical_user_id, or None if zero or multiple matches """ res = yield self._find_user_id_and_pwd_hash(user_id) @@ -933,6 +954,15 @@ class MacaroonGenerator(object): return macaroon.serialize() def generate_short_term_login_token(self, user_id, duration_in_ms=(2 * 60 * 1000)): + """ + + Args: + user_id (unicode): + duration_in_ms (int): + + Returns: + unicode + """ macaroon = self._generate_base_macaroon(user_id) macaroon.add_first_party_caveat("type = login") now = self.hs.get_clock().time_msec() diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 9e017116a9..c708c35d4d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -20,7 +20,11 @@ from twisted.internet import defer from synapse.api import errors from synapse.api.constants import EventTypes -from synapse.api.errors import FederationDeniedError +from synapse.api.errors import ( + FederationDeniedError, + HttpResponseException, + RequestSendFailed, +) from synapse.types import RoomStreamToken, get_domain_from_id from synapse.util import stringutils from synapse.util.async_helpers import Linearizer @@ -504,13 +508,13 @@ class DeviceListEduUpdater(object): origin = get_domain_from_id(user_id) try: result = yield self.federation.query_user_devices(origin, user_id) - except NotRetryingDestination: + except ( + NotRetryingDestination, RequestSendFailed, HttpResponseException, + ): # TODO: Remember that we are now out of sync and try again # later logger.warn( - "Failed to handle device list update for %s," - " we're not retrying the remote", - user_id, + "Failed to handle device list update for %s", user_id, ) # We abort on exceptions rather than accepting the update # as otherwise synapse will 'forget' that its device list @@ -532,6 +536,25 @@ class DeviceListEduUpdater(object): stream_id = result["stream_id"] devices = result["devices"] + + # If the remote server has more than ~1000 devices for this user + # we assume that something is going horribly wrong (e.g. a bot + # that logs in and creates a new device every time it tries to + # send a message). Maintaining lots of devices per user in the + # cache can cause serious performance issues as if this request + # takes more than 60s to complete, internal replication from the + # inbound federation worker to the synapse master may time out + # causing the inbound federation to fail and causing the remote + # server to retry, causing a DoS. So in this scenario we give + # up on storing the total list of devices and only handle the + # delta instead. + if len(devices) > 1000: + logger.warn( + "Ignoring device list snapshot for %s as it has >1K devs (%d)", + user_id, len(devices) + ) + devices = [] + yield self.store.update_remote_device_list_cache( user_id, devices, stream_id, ) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 7d67bf803a..8b113307d2 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -57,8 +57,8 @@ class DirectoryHandler(BaseHandler): # general association creation for both human users and app services for wchar in string.whitespace: - if wchar in room_alias.localpart: - raise SynapseError(400, "Invalid characters in room alias") + if wchar in room_alias.localpart: + raise SynapseError(400, "Invalid characters in room alias") if not self.hs.is_mine(room_alias): raise SynapseError(400, "Room alias must be local") @@ -112,7 +112,9 @@ class DirectoryHandler(BaseHandler): 403, "This user is not permitted to create this alias", ) - if not self.config.is_alias_creation_allowed(user_id, room_alias.to_string()): + if not self.config.is_alias_creation_allowed( + user_id, room_id, room_alias.to_string(), + ): # Lets just return a generic message, as there may be all sorts of # reasons why we said no. TODO: Allow configurable error messages # per alias creation rule? @@ -138,9 +140,30 @@ class DirectoryHandler(BaseHandler): ) @defer.inlineCallbacks - def delete_association(self, requester, room_alias): - # association deletion for human users + def delete_association(self, requester, room_alias, send_event=True): + """Remove an alias from the directory + (this is only meant for human users; AS users should call + delete_appservice_association) + + Args: + requester (Requester): + room_alias (RoomAlias): + send_event (bool): Whether to send an updated m.room.aliases event. + Note that, if we delete the canonical alias, we will always attempt + to send an m.room.canonical_alias event + + Returns: + Deferred[unicode]: room id that the alias used to point to + + Raises: + NotFoundError: if the alias doesn't exist + + AuthError: if the user doesn't have perms to delete the alias (ie, the user + is neither the creator of the alias, nor a server admin. + + SynapseError: if the alias belongs to an AS + """ user_id = requester.user.to_string() try: @@ -168,10 +191,11 @@ class DirectoryHandler(BaseHandler): room_id = yield self._delete_association(room_alias) try: - yield self.send_room_alias_update_event( - requester, - room_id - ) + if send_event: + yield self.send_room_alias_update_event( + requester, + room_id + ) yield self._update_canonical_alias( requester, @@ -373,9 +397,9 @@ class DirectoryHandler(BaseHandler): room_id (str) visibility (str): "public" or "private" """ - if not self.spam_checker.user_may_publish_room( - requester.user.to_string(), room_id - ): + user_id = requester.user.to_string() + + if not self.spam_checker.user_may_publish_room(user_id, room_id): raise AuthError( 403, "This user is not permitted to publish rooms to the room list" @@ -393,7 +417,24 @@ class DirectoryHandler(BaseHandler): yield self.auth.check_can_change_room_list(room_id, requester.user) - yield self.store.set_room_is_public(room_id, visibility == "public") + making_public = visibility == "public" + if making_public: + room_aliases = yield self.store.get_aliases_for_room(room_id) + canonical_alias = yield self.store.get_canonical_alias_for_room(room_id) + if canonical_alias: + room_aliases.append(canonical_alias) + + if not self.config.is_publishing_room_allowed( + user_id, room_id, room_aliases, + ): + # Lets just return a generic message, as there may be all sorts of + # reasons why we said no. TODO: Allow configurable error messages + # per alias creation rule? + raise SynapseError( + 403, "Not allowed to publish room", + ) + + yield self.store.set_room_is_public(room_id, making_public) @defer.inlineCallbacks def edit_published_appservice_room_list(self, appservice_id, network_id, diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index 5edb3cfe04..7bc174070e 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -19,7 +19,13 @@ from six import iteritems from twisted.internet import defer -from synapse.api.errors import RoomKeysVersionError, StoreError, SynapseError +from synapse.api.errors import ( + Codes, + NotFoundError, + RoomKeysVersionError, + StoreError, + SynapseError, +) from synapse.util.async_helpers import Linearizer logger = logging.getLogger(__name__) @@ -55,6 +61,8 @@ class E2eRoomKeysHandler(object): room_id(string): room ID to get keys for, for None to get keys for all rooms session_id(string): session ID to get keys for, for None to get keys for all sessions + Raises: + NotFoundError: if the backup version does not exist Returns: A deferred list of dicts giving the session_data and message metadata for these room keys. @@ -63,13 +71,19 @@ class E2eRoomKeysHandler(object): # we deliberately take the lock to get keys so that changing the version # works atomically with (yield self._upload_linearizer.queue(user_id)): + # make sure the backup version exists + try: + yield self.store.get_e2e_room_keys_version_info(user_id, version) + except StoreError as e: + if e.code == 404: + raise NotFoundError("Unknown backup version") + else: + raise + results = yield self.store.get_e2e_room_keys( user_id, version, room_id, session_id ) - if results['rooms'] == {}: - raise SynapseError(404, "No room_keys found") - defer.returnValue(results) @defer.inlineCallbacks @@ -120,7 +134,7 @@ class E2eRoomKeysHandler(object): } Raises: - SynapseError: with code 404 if there are no versions defined + NotFoundError: if there are no versions defined RoomKeysVersionError: if the uploaded version is not the current version """ @@ -134,7 +148,7 @@ class E2eRoomKeysHandler(object): version_info = yield self.store.get_e2e_room_keys_version_info(user_id) except StoreError as e: if e.code == 404: - raise SynapseError(404, "Version '%s' not found" % (version,)) + raise NotFoundError("Version '%s' not found" % (version,)) else: raise @@ -148,7 +162,7 @@ class E2eRoomKeysHandler(object): raise RoomKeysVersionError(current_version=version_info['version']) except StoreError as e: if e.code == 404: - raise SynapseError(404, "Version '%s' not found" % (version,)) + raise NotFoundError("Version '%s' not found" % (version,)) else: raise @@ -259,7 +273,7 @@ class E2eRoomKeysHandler(object): version(str): Optional; if None gives the most recent version otherwise a historical one. Raises: - StoreError: code 404 if the requested backup version doesn't exist + NotFoundError: if the requested backup version doesn't exist Returns: A deferred of a info dict that gives the info about the new version. @@ -271,7 +285,13 @@ class E2eRoomKeysHandler(object): """ with (yield self._upload_linearizer.queue(user_id)): - res = yield self.store.get_e2e_room_keys_version_info(user_id, version) + try: + res = yield self.store.get_e2e_room_keys_version_info(user_id, version) + except StoreError as e: + if e.code == 404: + raise NotFoundError("Unknown backup version") + else: + raise defer.returnValue(res) @defer.inlineCallbacks @@ -282,8 +302,60 @@ class E2eRoomKeysHandler(object): user_id(str): the user whose current backup version we're deleting version(str): the version id of the backup being deleted Raises: - StoreError: code 404 if this backup version doesn't exist + NotFoundError: if this backup version doesn't exist """ with (yield self._upload_linearizer.queue(user_id)): - yield self.store.delete_e2e_room_keys_version(user_id, version) + try: + yield self.store.delete_e2e_room_keys_version(user_id, version) + except StoreError as e: + if e.code == 404: + raise NotFoundError("Unknown backup version") + else: + raise + + @defer.inlineCallbacks + def update_version(self, user_id, version, version_info): + """Update the info about a given version of the user's backup + + Args: + user_id(str): the user whose current backup version we're updating + version(str): the backup version we're updating + version_info(dict): the new information about the backup + Raises: + NotFoundError: if the requested backup version doesn't exist + Returns: + A deferred of an empty dict. + """ + if "version" not in version_info: + raise SynapseError( + 400, + "Missing version in body", + Codes.MISSING_PARAM + ) + if version_info["version"] != version: + raise SynapseError( + 400, + "Version in body does not match", + Codes.INVALID_PARAM + ) + with (yield self._upload_linearizer.queue(user_id)): + try: + old_info = yield self.store.get_e2e_room_keys_version_info( + user_id, version + ) + except StoreError as e: + if e.code == 404: + raise NotFoundError("Unknown backup version") + else: + raise + if old_info["algorithm"] != version_info["algorithm"]: + raise SynapseError( + 400, + "Algorithm does not match", + Codes.INVALID_PARAM + ) + + yield self.store.update_e2e_room_keys_version(user_id, version, version_info) + + defer.returnValue({}) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cd5b9bbb19..f80486102a 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -34,6 +34,7 @@ from synapse.api.constants import ( EventTypes, Membership, RejectedReason, + RoomVersions, ) from synapse.api.errors import ( AuthError, @@ -43,10 +44,7 @@ from synapse.api.errors import ( StoreError, SynapseError, ) -from synapse.crypto.event_signing import ( - add_hashes_and_signatures, - compute_event_signature, -) +from synapse.crypto.event_signing import compute_event_signature from synapse.events.validator import EventValidator from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, @@ -58,7 +56,6 @@ from synapse.types import UserID, get_domain_from_id from synapse.util import logcontext, unwrapFirstError from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room -from synapse.util.frozenutils import unfreeze from synapse.util.logutils import log_function from synapse.util.retryutils import NotRetryingDestination from synapse.visibility import filter_events_for_server @@ -105,7 +102,7 @@ class FederationHandler(BaseHandler): self.hs = hs - self.store = hs.get_datastore() # type: synapse.storage.DataStore + self.store = hs.get_datastore() self.federation_client = hs.get_federation_client() self.state_handler = hs.get_state_handler() self.server_name = hs.hostname @@ -202,27 +199,22 @@ class FederationHandler(BaseHandler): self.room_queues[room_id].append((pdu, origin)) return - # If we're no longer in the room just ditch the event entirely. This - # is probably an old server that has come back and thinks we're still - # in the room (or we've been rejoined to the room by a state reset). + # If we're not in the room just ditch the event entirely. This is + # probably an old server that has come back and thinks we're still in + # the room (or we've been rejoined to the room by a state reset). # - # If we were never in the room then maybe our database got vaped and - # we should check if we *are* in fact in the room. If we are then we - # can magically rejoin the room. + # Note that if we were never in the room then we would have already + # dropped the event, since we wouldn't know the room version. is_in_room = yield self.auth.check_host_in_room( room_id, self.server_name ) if not is_in_room: - was_in_room = yield self.store.was_host_joined( - pdu.room_id, self.server_name, + logger.info( + "[%s %s] Ignoring PDU from %s as we're not in the room", + room_id, event_id, origin, ) - if was_in_room: - logger.info( - "[%s %s] Ignoring PDU from %s as we've left the room", - room_id, event_id, origin, - ) - defer.returnValue(None) + defer.returnValue(None) state = None auth_chain = [] @@ -239,7 +231,7 @@ class FederationHandler(BaseHandler): room_id, event_id, min_depth, ) - prevs = {e_id for e_id, _ in pdu.prev_events} + prevs = set(pdu.prev_event_ids()) seen = yield self.store.have_seen_events(prevs) if min_depth and pdu.depth < min_depth: @@ -347,6 +339,8 @@ class FederationHandler(BaseHandler): room_id, event_id, p, ) + room_version = yield self.store.get_room_version(room_id) + with logcontext.nested_logging_context(p): # note that if any of the missing prevs share missing state or # auth events, the requests to fetch those events are deduped @@ -360,7 +354,7 @@ class FederationHandler(BaseHandler): # we want the state *after* p; get_state_for_room returns the # state *before* p. remote_event = yield self.federation_client.get_pdu( - [origin], p, outlier=True, + [origin], p, room_version, outlier=True, ) if remote_event is None: @@ -384,7 +378,6 @@ class FederationHandler(BaseHandler): for x in remote_state: event_map[x.event_id] = x - room_version = yield self.store.get_room_version(room_id) state_map = yield resolve_events_with_store( room_version, state_maps, event_map, state_res_store=StateResolutionStore(self.store), @@ -557,86 +550,54 @@ class FederationHandler(BaseHandler): room_id, event_id, event, ) - # FIXME (erikj): Awful hack to make the case where we are not currently - # in the room work - # If state and auth_chain are None, then we don't need to do this check - # as we already know we have enough state in the DB to handle this - # event. - if state and auth_chain and not event.internal_metadata.is_outlier(): - is_in_room = yield self.auth.check_host_in_room( - room_id, - self.server_name - ) - else: - is_in_room = True - - if not is_in_room: - logger.info( - "[%s %s] Got event for room we're not in", - room_id, event_id, - ) + event_ids = set() + if state: + event_ids |= {e.event_id for e in state} + if auth_chain: + event_ids |= {e.event_id for e in auth_chain} - try: - yield self._persist_auth_tree( - origin, auth_chain, state, event - ) - except AuthError as e: - raise FederationError( - "ERROR", - e.code, - e.msg, - affected=event_id, - ) - - else: - event_ids = set() - if state: - event_ids |= {e.event_id for e in state} - if auth_chain: - event_ids |= {e.event_id for e in auth_chain} + seen_ids = yield self.store.have_seen_events(event_ids) - seen_ids = yield self.store.have_seen_events(event_ids) + if state and auth_chain is not None: + # If we have any state or auth_chain given to us by the replication + # layer, then we should handle them (if we haven't before.) - if state and auth_chain is not None: - # If we have any state or auth_chain given to us by the replication - # layer, then we should handle them (if we haven't before.) + event_infos = [] - event_infos = [] - - for e in itertools.chain(auth_chain, state): - if e.event_id in seen_ids: - continue - e.internal_metadata.outlier = True - auth_ids = [e_id for e_id, _ in e.auth_events] - auth = { - (e.type, e.state_key): e for e in auth_chain - if e.event_id in auth_ids or e.type == EventTypes.Create - } - event_infos.append({ - "event": e, - "auth_events": auth, - }) - seen_ids.add(e.event_id) + for e in itertools.chain(auth_chain, state): + if e.event_id in seen_ids: + continue + e.internal_metadata.outlier = True + auth_ids = e.auth_event_ids() + auth = { + (e.type, e.state_key): e for e in auth_chain + if e.event_id in auth_ids or e.type == EventTypes.Create + } + event_infos.append({ + "event": e, + "auth_events": auth, + }) + seen_ids.add(e.event_id) - logger.info( - "[%s %s] persisting newly-received auth/state events %s", - room_id, event_id, [e["event"].event_id for e in event_infos] - ) - yield self._handle_new_events(origin, event_infos) + logger.info( + "[%s %s] persisting newly-received auth/state events %s", + room_id, event_id, [e["event"].event_id for e in event_infos] + ) + yield self._handle_new_events(origin, event_infos) - try: - context = yield self._handle_new_event( - origin, - event, - state=state, - ) - except AuthError as e: - raise FederationError( - "ERROR", - e.code, - e.msg, - affected=event.event_id, - ) + try: + context = yield self._handle_new_event( + origin, + event, + state=state, + ) + except AuthError as e: + raise FederationError( + "ERROR", + e.code, + e.msg, + affected=event.event_id, + ) room = yield self.store.get_room(room_id) @@ -692,6 +653,8 @@ class FederationHandler(BaseHandler): if dest == self.server_name: raise SynapseError(400, "Can't backfill from self.") + room_version = yield self.store.get_room_version(room_id) + events = yield self.federation_client.backfill( dest, room_id, @@ -726,7 +689,7 @@ class FederationHandler(BaseHandler): edges = [ ev.event_id for ev in events - if set(e_id for e_id, _ in ev.prev_events) - event_ids + if set(ev.prev_event_ids()) - event_ids ] logger.info( @@ -753,7 +716,7 @@ class FederationHandler(BaseHandler): required_auth = set( a_id for event in events + list(state_events.values()) + list(auth_events.values()) - for a_id, _ in event.auth_events + for a_id in event.auth_event_ids() ) auth_events.update({ e_id: event_map[e_id] for e_id in required_auth if e_id in event_map @@ -769,7 +732,7 @@ class FederationHandler(BaseHandler): auth_events.update(ret_events) required_auth.update( - a_id for event in ret_events.values() for a_id, _ in event.auth_events + a_id for event in ret_events.values() for a_id in event.auth_event_ids() ) missing_auth = required_auth - set(auth_events) @@ -785,6 +748,7 @@ class FederationHandler(BaseHandler): self.federation_client.get_pdu, [dest], event_id, + room_version=room_version, outlier=True, timeout=10000, ) @@ -796,7 +760,7 @@ class FederationHandler(BaseHandler): required_auth.update( a_id for event in results if event - for a_id, _ in event.auth_events + for a_id in event.auth_event_ids() ) missing_auth = required_auth - set(auth_events) @@ -806,29 +770,52 @@ class FederationHandler(BaseHandler): set(auth_events.keys()) | set(state_events.keys()) ) + # We now have a chunk of events plus associated state and auth chain to + # persist. We do the persistence in two steps: + # 1. Auth events and state get persisted as outliers, plus the + # backward extremities get persisted (as non-outliers). + # 2. The rest of the events in the chunk get persisted one by one, as + # each one depends on the previous event for its state. + # + # The important thing is that events in the chunk get persisted as + # non-outliers, including when those events are also in the state or + # auth chain. Caution must therefore be taken to ensure that they are + # not accidentally marked as outliers. + + # Step 1a: persist auth events that *don't* appear in the chunk ev_infos = [] for a in auth_events.values(): - if a.event_id in seen_events: + # We only want to persist auth events as outliers that we haven't + # seen and aren't about to persist as part of the backfilled chunk. + if a.event_id in seen_events or a.event_id in event_map: continue + a.internal_metadata.outlier = True ev_infos.append({ "event": a, "auth_events": { (auth_events[a_id].type, auth_events[a_id].state_key): auth_events[a_id] - for a_id, _ in a.auth_events + for a_id in a.auth_event_ids() if a_id in auth_events } }) + # Step 1b: persist the events in the chunk we fetched state for (i.e. + # the backwards extremities) as non-outliers. for e_id in events_to_state: + # For paranoia we ensure that these events are marked as + # non-outliers + ev = event_map[e_id] + assert(not ev.internal_metadata.is_outlier()) + ev_infos.append({ - "event": event_map[e_id], + "event": ev, "state": events_to_state[e_id], "auth_events": { (auth_events[a_id].type, auth_events[a_id].state_key): auth_events[a_id] - for a_id, _ in event_map[e_id].auth_events + for a_id in ev.auth_event_ids() if a_id in auth_events } }) @@ -838,12 +825,17 @@ class FederationHandler(BaseHandler): backfilled=True, ) + # Step 2: Persist the rest of the events in the chunk one by one events.sort(key=lambda e: e.depth) for event in events: if event in events_to_state: continue + # For paranoia we ensure that these events are marked as + # non-outliers + assert(not event.internal_metadata.is_outlier()) + # We store these one at a time since each event depends on the # previous to work out the state. # TODO: We can probably do something more clever here. @@ -1041,17 +1033,17 @@ class FederationHandler(BaseHandler): Raises: SynapseError if the event does not pass muster """ - if len(ev.prev_events) > 20: + if len(ev.prev_event_ids()) > 20: logger.warn("Rejecting event %s which has %i prev_events", - ev.event_id, len(ev.prev_events)) + ev.event_id, len(ev.prev_event_ids())) raise SynapseError( http_client.BAD_REQUEST, "Too many prev_events", ) - if len(ev.auth_events) > 10: + if len(ev.auth_event_ids()) > 10: logger.warn("Rejecting event %s which has %i auth_events", - ev.event_id, len(ev.auth_events)) + ev.event_id, len(ev.auth_event_ids())) raise SynapseError( http_client.BAD_REQUEST, "Too many auth_events", @@ -1076,7 +1068,7 @@ class FederationHandler(BaseHandler): def on_event_auth(self, event_id): event = yield self.store.get_event(event_id) auth = yield self.store.get_auth_chain( - [auth_id for auth_id, _ in event.auth_events], + [auth_id for auth_id in event.auth_event_ids()], include_given=True ) defer.returnValue([e for e in auth]) @@ -1097,7 +1089,7 @@ class FederationHandler(BaseHandler): """ logger.debug("Joining %s to %s", joinee, room_id) - origin, event = yield self._make_and_verify_event( + origin, event, event_format_version = yield self._make_and_verify_event( target_hosts, room_id, joinee, @@ -1120,7 +1112,6 @@ class FederationHandler(BaseHandler): handled_events = set() try: - event = self._sign_event(event) # Try the host we successfully got a response to /make_join/ # request first. try: @@ -1128,7 +1119,9 @@ class FederationHandler(BaseHandler): target_hosts.insert(0, origin) except ValueError: pass - ret = yield self.federation_client.send_join(target_hosts, event) + ret = yield self.federation_client.send_join( + target_hosts, event, event_format_version, + ) origin = ret["origin"] state = ret["state"] @@ -1201,13 +1194,18 @@ class FederationHandler(BaseHandler): """ event_content = {"membership": Membership.JOIN} - builder = self.event_builder_factory.new({ - "type": EventTypes.Member, - "content": event_content, - "room_id": room_id, - "sender": user_id, - "state_key": user_id, - }) + room_version = yield self.store.get_room_version(room_id) + + builder = self.event_builder_factory.new( + room_version, + { + "type": EventTypes.Member, + "content": event_content, + "room_id": room_id, + "sender": user_id, + "state_key": user_id, + } + ) try: event, context = yield self.event_creation_handler.create_new_client_event( @@ -1219,7 +1217,9 @@ class FederationHandler(BaseHandler): # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_join_request` - yield self.auth.check_from_context(event, context, do_sig_check=False) + yield self.auth.check_from_context( + room_version, event, context, do_sig_check=False, + ) defer.returnValue(event) @@ -1324,11 +1324,11 @@ class FederationHandler(BaseHandler): ) event.internal_metadata.outlier = True - event.internal_metadata.invite_from_remote = True + event.internal_metadata.out_of_band_membership = True event.signatures.update( compute_event_signature( - event, + event.get_pdu_json(), self.hs.hostname, self.hs.config.signing_key[0] ) @@ -1341,7 +1341,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def do_remotely_reject_invite(self, target_hosts, room_id, user_id): - origin, event = yield self._make_and_verify_event( + origin, event, event_format_version = yield self._make_and_verify_event( target_hosts, room_id, user_id, @@ -1350,7 +1350,7 @@ class FederationHandler(BaseHandler): # Mark as outlier as we don't have any state for this event; we're not # even in the room. event.internal_metadata.outlier = True - event = self._sign_event(event) + event.internal_metadata.out_of_band_membership = True # Try the host that we succesfully called /make_leave/ on first for # the /send_leave/ request. @@ -1373,7 +1373,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def _make_and_verify_event(self, target_hosts, room_id, user_id, membership, content={}, params=None): - origin, pdu = yield self.federation_client.make_membership_event( + origin, event, format_ver = yield self.federation_client.make_membership_event( target_hosts, room_id, user_id, @@ -1382,9 +1382,7 @@ class FederationHandler(BaseHandler): params=params, ) - logger.debug("Got response to make_%s: %s", membership, pdu) - - event = pdu + logger.debug("Got response to make_%s: %s", membership, event) # We should assert some things. # FIXME: Do this in a nicer way @@ -1392,28 +1390,7 @@ class FederationHandler(BaseHandler): assert(event.user_id == user_id) assert(event.state_key == user_id) assert(event.room_id == room_id) - defer.returnValue((origin, event)) - - def _sign_event(self, event): - event.internal_metadata.outlier = False - - builder = self.event_builder_factory.new( - unfreeze(event.get_pdu_json()) - ) - - builder.event_id = self.event_builder_factory.create_event_id() - builder.origin = self.hs.hostname - - if not hasattr(event, "signatures"): - builder.signatures = {} - - add_hashes_and_signatures( - builder, - self.hs.hostname, - self.hs.config.signing_key[0], - ) - - return builder.build() + defer.returnValue((origin, event, format_ver)) @defer.inlineCallbacks @log_function @@ -1422,13 +1399,17 @@ class FederationHandler(BaseHandler): leave event for the room and return that. We do *not* persist or process it until the other server has signed it and sent it back. """ - builder = self.event_builder_factory.new({ - "type": EventTypes.Member, - "content": {"membership": Membership.LEAVE}, - "room_id": room_id, - "sender": user_id, - "state_key": user_id, - }) + room_version = yield self.store.get_room_version(room_id) + builder = self.event_builder_factory.new( + room_version, + { + "type": EventTypes.Member, + "content": {"membership": Membership.LEAVE}, + "room_id": room_id, + "sender": user_id, + "state_key": user_id, + } + ) event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, @@ -1437,7 +1418,9 @@ class FederationHandler(BaseHandler): try: # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_leave_request` - yield self.auth.check_from_context(event, context, do_sig_check=False) + yield self.auth.check_from_context( + room_version, event, context, do_sig_check=False, + ) except AuthError as e: logger.warn("Failed to create new leave %r because %s", event, e) raise e @@ -1696,9 +1679,16 @@ class FederationHandler(BaseHandler): create_event = e break + if create_event is None: + # If the state doesn't have a create event then the room is + # invalid, and it would fail auth checks anyway. + raise SynapseError(400, "No create event in state") + + room_version = create_event.content.get("room_version", RoomVersions.V1) + missing_auth_events = set() for e in itertools.chain(auth_events, state, [event]): - for e_id, _ in e.auth_events: + for e_id in e.auth_event_ids(): if e_id not in event_map: missing_auth_events.add(e_id) @@ -1706,6 +1696,7 @@ class FederationHandler(BaseHandler): m_ev = yield self.federation_client.get_pdu( [origin], e_id, + room_version=room_version, outlier=True, timeout=10000, ) @@ -1717,14 +1708,14 @@ class FederationHandler(BaseHandler): for e in itertools.chain(auth_events, state, [event]): auth_for_e = { (event_map[e_id].type, event_map[e_id].state_key): event_map[e_id] - for e_id, _ in e.auth_events + for e_id in e.auth_event_ids() if e_id in event_map } if create_event: auth_for_e[(EventTypes.Create, "")] = create_event try: - self.auth.check(e, auth_events=auth_for_e) + self.auth.check(room_version, e, auth_events=auth_for_e) except SynapseError as err: # we may get SynapseErrors here as well as AuthErrors. For # instance, there are a couple of (ancient) events in some @@ -1785,10 +1776,10 @@ class FederationHandler(BaseHandler): # This is a hack to fix some old rooms where the initial join event # didn't reference the create event in its auth events. - if event.type == EventTypes.Member and not event.auth_events: - if len(event.prev_events) == 1 and event.depth < 5: + if event.type == EventTypes.Member and not event.auth_event_ids(): + if len(event.prev_event_ids()) == 1 and event.depth < 5: c = yield self.store.get_event( - event.prev_events[0][0], + event.prev_event_ids()[0], allow_none=True, ) if c and c.type == EventTypes.Create: @@ -1835,7 +1826,7 @@ class FederationHandler(BaseHandler): # Now get the current auth_chain for the event. local_auth_chain = yield self.store.get_auth_chain( - [auth_id for auth_id, _ in event.auth_events], + [auth_id for auth_id in event.auth_event_ids()], include_given=True ) @@ -1891,7 +1882,7 @@ class FederationHandler(BaseHandler): """ # Check if we have all the auth events. current_state = set(e.event_id for e in auth_events.values()) - event_auth_events = set(e_id for e_id, _ in event.auth_events) + event_auth_events = set(event.auth_event_ids()) if event.is_state(): event_key = (event.type, event.state_key) @@ -1935,7 +1926,7 @@ class FederationHandler(BaseHandler): continue try: - auth_ids = [e_id for e_id, _ in e.auth_events] + auth_ids = e.auth_event_ids() auth = { (e.type, e.state_key): e for e in remote_auth_chain if e.event_id in auth_ids or e.type == EventTypes.Create @@ -1956,7 +1947,7 @@ class FederationHandler(BaseHandler): pass have_events = yield self.store.get_seen_events_with_rejections( - [e_id for e_id, _ in event.auth_events] + event.auth_event_ids() ) seen_events = set(have_events.keys()) except Exception: @@ -1968,6 +1959,8 @@ class FederationHandler(BaseHandler): current_state = set(e.event_id for e in auth_events.values()) different_auth = event_auth_events - current_state + room_version = yield self.store.get_room_version(event.room_id) + if different_auth and not event.internal_metadata.is_outlier(): # Do auth conflict res. logger.info("Different auth: %s", different_auth) @@ -1992,8 +1985,6 @@ class FederationHandler(BaseHandler): (d.type, d.state_key): d for d in different_events if d }) - room_version = yield self.store.get_room_version(event.room_id) - new_state = yield self.state_handler.resolve_events( room_version, [list(local_view.values()), list(remote_view.values())], @@ -2058,7 +2049,7 @@ class FederationHandler(BaseHandler): continue try: - auth_ids = [e_id for e_id, _ in ev.auth_events] + auth_ids = ev.auth_event_ids() auth = { (e.type, e.state_key): e for e in result["auth_chain"] @@ -2093,7 +2084,7 @@ class FederationHandler(BaseHandler): ) try: - self.auth.check(event, auth_events=auth_events) + self.auth.check(room_version, event, auth_events=auth_events) except AuthError as e: logger.warn("Failed auth resolution for %r because %s", event, e) raise e @@ -2250,7 +2241,7 @@ class FederationHandler(BaseHandler): missing_remote_ids = [e.event_id for e in missing_remotes] base_remote_rejected = list(missing_remotes) for e in missing_remotes: - for e_id, _ in e.auth_events: + for e_id in e.auth_event_ids(): if e_id in missing_remote_ids: try: base_remote_rejected.remove(e) @@ -2316,18 +2307,26 @@ class FederationHandler(BaseHandler): } if (yield self.auth.check_host_in_room(room_id, self.hs.hostname)): - builder = self.event_builder_factory.new(event_dict) - EventValidator().validate_new(builder) + room_version = yield self.store.get_room_version(room_id) + builder = self.event_builder_factory.new(room_version, event_dict) + + EventValidator().validate_builder(builder) event, context = yield self.event_creation_handler.create_new_client_event( builder=builder ) event, context = yield self.add_display_name_to_third_party_invite( - event_dict, event, context + room_version, event_dict, event, context ) + EventValidator().validate_new(event) + + # We need to tell the transaction queue to send this out, even + # though the sender isn't a local user. + event.internal_metadata.send_on_behalf_of = self.hs.hostname + try: - yield self.auth.check_from_context(event, context) + yield self.auth.check_from_context(room_version, event, context) except AuthError as e: logger.warn("Denying new third party invite %r because %s", event, e) raise e @@ -2354,23 +2353,31 @@ class FederationHandler(BaseHandler): Returns: Deferred: resolves (to None) """ - builder = self.event_builder_factory.new(event_dict) + room_version = yield self.store.get_room_version(room_id) + + # NB: event_dict has a particular specced format we might need to fudge + # if we change event formats too much. + builder = self.event_builder_factory.new(room_version, event_dict) event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) event, context = yield self.add_display_name_to_third_party_invite( - event_dict, event, context + room_version, event_dict, event, context ) try: - self.auth.check_from_context(event, context) + self.auth.check_from_context(room_version, event, context) except AuthError as e: logger.warn("Denying third party invite %r because %s", event, e) raise e yield self._check_signature(event, context) + # We need to tell the transaction queue to send this out, even + # though the sender isn't a local user. + event.internal_metadata.send_on_behalf_of = get_domain_from_id(event.sender) + # XXX we send the invite here, but send_membership_event also sends it, # so we end up making two requests. I think this is redundant. returned_invite = yield self.send_invite(origin, event) @@ -2381,7 +2388,8 @@ class FederationHandler(BaseHandler): yield member_handler.send_membership_event(None, event, context) @defer.inlineCallbacks - def add_display_name_to_third_party_invite(self, event_dict, event, context): + def add_display_name_to_third_party_invite(self, room_version, event_dict, + event, context): key = ( EventTypes.ThirdPartyInvite, event.content["third_party_invite"]["signed"]["token"] @@ -2405,11 +2413,12 @@ class FederationHandler(BaseHandler): # auth checks. If we need the invite and don't have it then the # auth check code will explode appropriately. - builder = self.event_builder_factory.new(event_dict) - EventValidator().validate_new(builder) + builder = self.event_builder_factory.new(room_version, event_dict) + EventValidator().validate_builder(builder) event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) + EventValidator().validate_new(event) defer.returnValue((event, context)) @defer.inlineCallbacks diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py index 173315af6c..02c508acec 100644 --- a/synapse/handlers/groups_local.py +++ b/synapse/handlers/groups_local.py @@ -20,7 +20,7 @@ from six import iteritems from twisted.internet import defer -from synapse.api.errors import HttpResponseException, SynapseError +from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError from synapse.types import get_domain_from_id logger = logging.getLogger(__name__) @@ -46,13 +46,19 @@ def _create_rerouter(func_name): # when the remote end responds with things like 403 Not # In Group, we can communicate that to the client instead # of a 500. - def h(failure): + def http_response_errback(failure): failure.trap(HttpResponseException) e = failure.value if e.code == 403: raise e.to_synapse_error() return failure - d.addErrback(h) + + def request_failed_errback(failure): + failure.trap(RequestSendFailed) + raise SynapseError(502, "Failed to contact group server") + + d.addErrback(http_response_errback) + d.addErrback(request_failed_errback) return d return f diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 5feb3f22a6..39184f0e22 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -167,18 +167,21 @@ class IdentityHandler(BaseHandler): "mxid": mxid, "threepid": threepid, } - headers = {} + # we abuse the federation http client to sign the request, but we have to send it # using the normal http client since we don't want the SRV lookup and want normal # 'browser-like' HTTPS. - self.federation_http_client.sign_request( + auth_headers = self.federation_http_client.build_auth_headers( destination=None, method='POST', url_bytes='/_matrix/identity/api/v1/3pid/unbind'.encode('ascii'), - headers_dict=headers, content=content, destination_is=id_server, ) + headers = { + b"Authorization": auth_headers, + } + try: yield self.http_client.post_json_get_json( url, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 969e588e73..3981fe69ce 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -22,7 +22,7 @@ from canonicaljson import encode_canonical_json, json from twisted.internet import defer from twisted.internet.defer import succeed -from synapse.api.constants import MAX_DEPTH, EventTypes, Membership +from synapse.api.constants import EventTypes, Membership, RoomVersions from synapse.api.errors import ( AuthError, Codes, @@ -31,7 +31,6 @@ from synapse.api.errors import ( SynapseError, ) from synapse.api.urls import ConsentURIBuilder -from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator from synapse.replication.http.send_event import ReplicationSendEventRestServlet @@ -278,9 +277,17 @@ class EventCreationHandler(object): """ yield self.auth.check_auth_blocking(requester.user.to_string()) - builder = self.event_builder_factory.new(event_dict) + if event_dict["type"] == EventTypes.Create and event_dict["state_key"] == "": + room_version = event_dict["content"]["room_version"] + else: + try: + room_version = yield self.store.get_room_version(event_dict["room_id"]) + except NotFoundError: + raise AuthError(403, "Unknown room") + + builder = self.event_builder_factory.new(room_version, event_dict) - self.validator.validate_new(builder) + self.validator.validate_builder(builder) if builder.type == EventTypes.Member: membership = builder.content.get("membership", None) @@ -318,6 +325,8 @@ class EventCreationHandler(object): prev_events_and_hashes=prev_events_and_hashes, ) + self.validator.validate_new(event) + defer.returnValue((event, context)) def _is_exempt_from_privacy_policy(self, builder, requester): @@ -427,6 +436,9 @@ class EventCreationHandler(object): if event.is_state(): prev_state = yield self.deduplicate_state_event(event, context) + logger.info( + "Not bothering to persist duplicate state event %s", event.event_id, + ) if prev_state is not None: defer.returnValue(prev_state) @@ -532,40 +544,19 @@ class EventCreationHandler(object): prev_events_and_hashes = \ yield self.store.get_prev_events_for_room(builder.room_id) - if prev_events_and_hashes: - depth = max([d for _, _, d in prev_events_and_hashes]) + 1 - # we cap depth of generated events, to ensure that they are not - # rejected by other servers (and so that they can be persisted in - # the db) - depth = min(depth, MAX_DEPTH) - else: - depth = 1 - prev_events = [ (event_id, prev_hashes) for event_id, prev_hashes, _ in prev_events_and_hashes ] - builder.prev_events = prev_events - builder.depth = depth - - context = yield self.state.compute_event_context(builder) + event = yield builder.build( + prev_event_ids=[p for p, _ in prev_events], + ) + context = yield self.state.compute_event_context(event) if requester: context.app_service = requester.app_service - if builder.is_state(): - builder.prev_state = yield self.store.add_event_hashes( - context.prev_state_events - ) - - yield self.auth.add_auth_events(builder, context) - - signing_key = self.hs.config.signing_key[0] - add_hashes_and_signatures( - builder, self.server_name, signing_key - ) - - event = builder.build() + self.validator.validate_new(event) logger.debug( "Created event %s", @@ -600,8 +591,13 @@ class EventCreationHandler(object): extra_users (list(UserID)): Any extra users to notify about event """ + if event.is_state() and (event.type, event.state_key) == (EventTypes.Create, ""): + room_version = event.content.get("room_version", RoomVersions.V1) + else: + room_version = yield self.store.get_room_version(event.room_id) + try: - yield self.auth.check_from_context(event, context) + yield self.auth.check_from_context(room_version, event, context) except AuthError as err: logger.warn("Denying new event %r because %s", event, err) raise err @@ -749,7 +745,8 @@ class EventCreationHandler(object): auth_events = { (e.type, e.state_key): e for e in auth_events.values() } - if self.auth.check_redaction(event, auth_events=auth_events): + room_version = yield self.store.get_room_version(event.room_id) + if self.auth.check_redaction(room_version, event, auth_events=auth_events): original_event = yield self.store.get_event( event.redacts, check_redacted=False, @@ -763,6 +760,9 @@ class EventCreationHandler(object): "You don't have permission to redact events" ) + # We've already checked. + event.internal_metadata.recheck_redaction = False + if event.type == EventTypes.Create: prev_state_ids = yield context.get_prev_state_ids(self.store) if prev_state_ids: diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 43f81bd607..e4fdae9266 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -136,7 +136,11 @@ class PaginationHandler(object): logger.info("[purge] complete") self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE except Exception: - logger.error("[purge] failed: %s", Failure().getTraceback().rstrip()) + f = Failure() + logger.error( + "[purge] failed", + exc_info=(f.type, f.value, f.getTracebackObject()), + ) self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED finally: self._purges_in_progress_by_room.discard(room_id) @@ -235,6 +239,17 @@ class PaginationHandler(object): "room_key", next_key ) + if events: + if event_filter: + events = event_filter.filter(events) + + events = yield filter_events_for_client( + self.store, + user_id, + events, + is_peeking=(member_event_id is None), + ) + if not events: defer.returnValue({ "chunk": [], @@ -242,18 +257,8 @@ class PaginationHandler(object): "end": next_token.to_string(), }) - if event_filter: - events = event_filter.filter(events) - - events = yield filter_events_for_client( - self.store, - user_id, - events, - is_peeking=(member_event_id is None), - ) - state = None - if event_filter and event_filter.lazy_load_members(): + if event_filter and event_filter.lazy_load_members() and len(events) > 0: # TODO: remove redundant members # FIXME: we also care about invite targets etc. diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 4c2690ba26..696469732c 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -16,8 +16,8 @@ import logging from twisted.internet import defer +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import get_domain_from_id -from synapse.util import logcontext from ._base import BaseHandler @@ -59,7 +59,9 @@ class ReceiptsHandler(BaseHandler): if is_new: # fire off a process in the background to send the receipt to # remote servers - self._push_remotes([receipt]) + run_as_background_process( + 'push_receipts_to_remotes', self._push_remotes, receipt + ) @defer.inlineCallbacks def _received_remote_receipt(self, origin, content): @@ -125,44 +127,42 @@ class ReceiptsHandler(BaseHandler): defer.returnValue(True) - @logcontext.preserve_fn # caller should not yield on this @defer.inlineCallbacks - def _push_remotes(self, receipts): - """Given a list of receipts, works out which remote servers should be + def _push_remotes(self, receipt): + """Given a receipt, works out which remote servers should be poked and pokes them. """ try: - # TODO: Some of this stuff should be coallesced. - for receipt in receipts: - room_id = receipt["room_id"] - receipt_type = receipt["receipt_type"] - user_id = receipt["user_id"] - event_ids = receipt["event_ids"] - data = receipt["data"] - - users = yield self.state.get_current_user_in_room(room_id) - remotedomains = set(get_domain_from_id(u) for u in users) - remotedomains = remotedomains.copy() - remotedomains.discard(self.server_name) - - logger.debug("Sending receipt to: %r", remotedomains) - - for domain in remotedomains: - self.federation.send_edu( - destination=domain, - edu_type="m.receipt", - content={ - room_id: { - receipt_type: { - user_id: { - "event_ids": event_ids, - "data": data, - } + # TODO: optimise this to move some of the work to the workers. + room_id = receipt["room_id"] + receipt_type = receipt["receipt_type"] + user_id = receipt["user_id"] + event_ids = receipt["event_ids"] + data = receipt["data"] + + users = yield self.state.get_current_user_in_room(room_id) + remotedomains = set(get_domain_from_id(u) for u in users) + remotedomains = remotedomains.copy() + remotedomains.discard(self.server_name) + + logger.debug("Sending receipt to: %r", remotedomains) + + for domain in remotedomains: + self.federation.send_edu( + destination=domain, + edu_type="m.receipt", + content={ + room_id: { + receipt_type: { + user_id: { + "event_ids": event_ids, + "data": data, } - }, + } }, - key=(room_id, receipt_type, user_id), - ) + }, + key=(room_id, receipt_type, user_id), + ) except Exception: logger.exception("Error pushing receipts to remote servers") diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index e9d7b25a36..c0e06929bd 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -19,6 +19,7 @@ import logging from twisted.internet import defer from synapse import types +from synapse.api.constants import LoginType from synapse.api.errors import ( AuthError, Codes, @@ -26,7 +27,14 @@ from synapse.api.errors import ( RegistrationError, SynapseError, ) +from synapse.config.server import is_threepid_reserved from synapse.http.client import CaptchaServerHttpClient +from synapse.http.servlet import assert_params_in_dict +from synapse.replication.http.login import RegisterDeviceReplicationServlet +from synapse.replication.http.register import ( + ReplicationPostRegisterActionsServlet, + ReplicationRegisterServlet, +) from synapse.types import RoomAlias, RoomID, UserID, create_requester from synapse.util.async_helpers import Linearizer from synapse.util.threepids import check_3pid_allowed @@ -50,8 +58,8 @@ class RegistrationHandler(BaseHandler): self._auth_handler = hs.get_auth_handler() self.profile_handler = hs.get_profile_handler() self.user_directory_handler = hs.get_user_directory_handler() - self.room_creation_handler = self.hs.get_room_creation_handler() self.captcha_client = CaptchaServerHttpClient(hs) + self.identity_handler = self.hs.get_handlers().identity_handler self._next_generated_user_id = None @@ -62,6 +70,18 @@ class RegistrationHandler(BaseHandler): ) self._server_notices_mxid = hs.config.server_notices_mxid + if hs.config.worker_app: + self._register_client = ReplicationRegisterServlet.make_client(hs) + self._register_device_client = ( + RegisterDeviceReplicationServlet.make_client(hs) + ) + self._post_registration_client = ( + ReplicationPostRegisterActionsServlet.make_client(hs) + ) + else: + self.device_handler = hs.get_device_handler() + self.pusher_pool = hs.get_pusherpool() + @defer.inlineCallbacks def check_username(self, localpart, guest_access_token=None, assigned_user_id=None): @@ -127,6 +147,8 @@ class RegistrationHandler(BaseHandler): make_guest=False, admin=False, threepid=None, + user_type=None, + default_display_name=None, ): """Registers a new client on the server. @@ -141,6 +163,10 @@ class RegistrationHandler(BaseHandler): since it offers no means of associating a device_id with the access_token. Instead you should call auth_handler.issue_access_token after registration. + user_type (str|None): type of user. One of the values from + api.constants.UserTypes, or None for a normal user. + default_display_name (unicode|None): if set, the new user's displayname + will be set to this. Defaults to 'localpart'. Returns: A tuple of (user_id, access_token). Raises: @@ -150,7 +176,7 @@ class RegistrationHandler(BaseHandler): yield self.auth.check_auth_blocking(threepid=threepid) password_hash = None if password: - password_hash = yield self.auth_handler().hash(password) + password_hash = yield self._auth_handler.hash(password) if localpart: yield self.check_username(localpart, guest_access_token=guest_access_token) @@ -170,20 +196,25 @@ class RegistrationHandler(BaseHandler): user = UserID(localpart, self.hs.hostname) user_id = user.to_string() + if was_guest: + # If the user was a guest then they already have a profile + default_display_name = None + + elif default_display_name is None: + default_display_name = localpart + token = None if generate_token: token = self.macaroon_gen.generate_access_token(user_id) - yield self.store.register( + yield self._register_with_store( user_id=user_id, token=token, password_hash=password_hash, was_guest=was_guest, make_guest=make_guest, - create_profile_with_localpart=( - # If the user was a guest then they already have a profile - None if was_guest else user.localpart - ), + create_profile_with_displayname=default_display_name, admin=admin, + user_type=user_type, ) if self.hs.config.user_directory_search_all_users: @@ -204,13 +235,15 @@ class RegistrationHandler(BaseHandler): yield self.check_user_id_not_appservice_exclusive(user_id) if generate_token: token = self.macaroon_gen.generate_access_token(user_id) + if default_display_name is None: + default_display_name = localpart try: - yield self.store.register( + yield self._register_with_store( user_id=user_id, token=token, password_hash=password_hash, make_guest=make_guest, - create_profile_with_localpart=user.localpart, + create_profile_with_displayname=default_display_name, ) except SynapseError: # if user id is taken, just generate another @@ -218,16 +251,34 @@ class RegistrationHandler(BaseHandler): user_id = None token = None attempts += 1 + if not self.hs.config.user_consent_at_registration: + yield self._auto_join_rooms(user_id) + + defer.returnValue((user_id, token)) + + @defer.inlineCallbacks + def _auto_join_rooms(self, user_id): + """Automatically joins users to auto join rooms - creating the room in the first place + if the user is the first to be created. + Args: + user_id(str): The user to join + """ # auto-join the user to any rooms we're supposed to dump them into fake_requester = create_requester(user_id) - # try to create the room if we're the first user on the server + # try to create the room if we're the first real user on the server. Note + # that an auto-generated support user is not a real user and will never be + # the user to create the room should_auto_create_rooms = False - if self.hs.config.autocreate_auto_join_rooms: + is_support = yield self.store.is_support_user(user_id) + # There is an edge case where the first user is the support user, then + # the room is never created, though this seems unlikely and + # recoverable from given the support user being involved in the first + # place. + if self.hs.config.autocreate_auto_join_rooms and not is_support: count = yield self.store.count_all_users() should_auto_create_rooms = count == 1 - for r in self.hs.config.auto_join_rooms: try: if should_auto_create_rooms: @@ -241,7 +292,10 @@ class RegistrationHandler(BaseHandler): else: # create room expects the localpart of the room alias room_alias_localpart = room_alias.localpart - yield self.room_creation_handler.create_room( + + # getting the RoomCreationHandler during init gives a dependency + # loop + yield self.hs.get_room_creation_handler().create_room( fake_requester, config={ "preset": "public_chat", @@ -254,10 +308,15 @@ class RegistrationHandler(BaseHandler): except Exception as e: logger.error("Failed to join new user to %r: %r", r, e) - # We used to generate default identicons here, but nowadays - # we want clients to generate their own as part of their branding - # rather than there being consistent matrix-wide ones, so we don't. - defer.returnValue((user_id, token)) + @defer.inlineCallbacks + def post_consent_actions(self, user_id): + """A series of registration actions that can only be carried out once consent + has been granted + + Args: + user_id (str): The user to join + """ + yield self._auto_join_rooms(user_id) @defer.inlineCallbacks def appservice_register(self, user_localpart, as_token): @@ -278,11 +337,11 @@ class RegistrationHandler(BaseHandler): user_id, allowed_appservice=service ) - yield self.store.register( + yield self._register_with_store( user_id=user_id, password_hash="", appservice_id=service_id, - create_profile_with_localpart=user.localpart, + create_profile_with_displayname=user.localpart, ) defer.returnValue(user_id) @@ -310,35 +369,6 @@ class RegistrationHandler(BaseHandler): logger.info("Valid captcha entered from %s", ip) @defer.inlineCallbacks - def register_saml2(self, localpart): - """ - Registers email_id as SAML2 Based Auth. - """ - if types.contains_invalid_mxid_characters(localpart): - raise SynapseError( - 400, - "User ID can only contain characters a-z, 0-9, or '=_-./'", - ) - yield self.auth.check_auth_blocking() - user = UserID(localpart, self.hs.hostname) - user_id = user.to_string() - - yield self.check_user_id_not_appservice_exclusive(user_id) - token = self.macaroon_gen.generate_access_token(user_id) - try: - yield self.store.register( - user_id=user_id, - token=token, - password_hash=None, - create_profile_with_localpart=user.localpart, - ) - except Exception as e: - yield self.store.add_access_token_to_user(user_id, token) - # Ignore Registration errors - logger.exception(e) - defer.returnValue((user_id, token)) - - @defer.inlineCallbacks def register_email(self, threepidCreds): """ Registers emails with an identity server. @@ -350,8 +380,7 @@ class RegistrationHandler(BaseHandler): logger.info("validating threepidcred sid %s on id server %s", c['sid'], c['idServer']) try: - identity_handler = self.hs.get_handlers().identity_handler - threepid = yield identity_handler.threepid_from_creds(c) + threepid = yield self.identity_handler.threepid_from_creds(c) except Exception: logger.exception("Couldn't validate 3pid") raise RegistrationError(400, "Couldn't validate 3pid") @@ -375,9 +404,8 @@ class RegistrationHandler(BaseHandler): # Now we have a matrix ID, bind it to the threepids we were given for c in threepidCreds: - identity_handler = self.hs.get_handlers().identity_handler # XXX: This should be a deferred list, shouldn't it? - yield identity_handler.bind_threepid(c, user_id) + yield self.identity_handler.bind_threepid(c, user_id) def check_user_id_not_appservice_exclusive(self, user_id, allowed_appservice=None): # don't allow people to register the server notices mxid @@ -432,7 +460,7 @@ class RegistrationHandler(BaseHandler): lines = response.split('\n') json = { "valid": lines[0] == 'true', - "error_url": "http://www.google.com/recaptcha/api/challenge?" + + "error_url": "http://www.recaptcha.net/recaptcha/api/challenge?" + "error=%s" % lines[1] } defer.returnValue(json) @@ -443,7 +471,7 @@ class RegistrationHandler(BaseHandler): Used only by c/s api v1 """ data = yield self.captcha_client.post_urlencoded_get_raw( - "http://www.google.com:80/recaptcha/api/verify", + "http://www.recaptcha.net:80/recaptcha/api/verify", args={ 'privatekey': private_key, 'remoteip': ip_addr, @@ -485,11 +513,11 @@ class RegistrationHandler(BaseHandler): token = self.macaroon_gen.generate_access_token(user_id) if need_register: - yield self.store.register( + yield self._register_with_store( user_id=user_id, token=token, password_hash=password_hash, - create_profile_with_localpart=user.localpart, + create_profile_with_displayname=user.localpart, ) else: yield self._auth_handler.delete_access_tokens_for_user(user_id) @@ -503,9 +531,6 @@ class RegistrationHandler(BaseHandler): defer.returnValue((user_id, token)) - def auth_handler(self): - return self.hs.get_auth_handler() - @defer.inlineCallbacks def get_or_register_3pid_guest(self, medium, address, inviter_user_id): """Get a guest access token for a 3PID, creating a guest account if @@ -564,3 +589,275 @@ class RegistrationHandler(BaseHandler): action="join", ratelimit=False, ) + + def _register_with_store(self, user_id, token=None, password_hash=None, + was_guest=False, make_guest=False, appservice_id=None, + create_profile_with_displayname=None, admin=False, + user_type=None): + """Register user in the datastore. + + Args: + user_id (str): The desired user ID to register. + token (str): The desired access token to use for this user. If this + is not None, the given access token is associated with the user + id. + password_hash (str|None): Optional. The password hash for this user. + was_guest (bool): Optional. Whether this is a guest account being + upgraded to a non-guest account. + make_guest (boolean): True if the the new user should be guest, + false to add a regular user account. + appservice_id (str|None): The ID of the appservice registering the user. + create_profile_with_displayname (unicode|None): Optionally create a + profile for the user, setting their displayname to the given value + admin (boolean): is an admin user? + user_type (str|None): type of user. One of the values from + api.constants.UserTypes, or None for a normal user. + + Returns: + Deferred + """ + if self.hs.config.worker_app: + return self._register_client( + user_id=user_id, + token=token, + password_hash=password_hash, + was_guest=was_guest, + make_guest=make_guest, + appservice_id=appservice_id, + create_profile_with_displayname=create_profile_with_displayname, + admin=admin, + user_type=user_type, + ) + else: + return self.store.register( + user_id=user_id, + token=token, + password_hash=password_hash, + was_guest=was_guest, + make_guest=make_guest, + appservice_id=appservice_id, + create_profile_with_displayname=create_profile_with_displayname, + admin=admin, + user_type=user_type, + ) + + @defer.inlineCallbacks + def register_device(self, user_id, device_id, initial_display_name, + is_guest=False): + """Register a device for a user and generate an access token. + + Args: + user_id (str): full canonical @user:id + device_id (str|None): The device ID to check, or None to generate + a new one. + initial_display_name (str|None): An optional display name for the + device. + is_guest (bool): Whether this is a guest account + + Returns: + defer.Deferred[tuple[str, str]]: Tuple of device ID and access token + """ + + if self.hs.config.worker_app: + r = yield self._register_device_client( + user_id=user_id, + device_id=device_id, + initial_display_name=initial_display_name, + is_guest=is_guest, + ) + defer.returnValue((r["device_id"], r["access_token"])) + else: + device_id = yield self.device_handler.check_device_registered( + user_id, device_id, initial_display_name + ) + if is_guest: + access_token = self.macaroon_gen.generate_access_token( + user_id, ["guest = true"] + ) + else: + access_token = yield self._auth_handler.get_access_token_for_user_id( + user_id, device_id=device_id, + ) + + defer.returnValue((device_id, access_token)) + + @defer.inlineCallbacks + def post_registration_actions(self, user_id, auth_result, access_token, + bind_email, bind_msisdn): + """A user has completed registration + + Args: + user_id (str): The user ID that consented + auth_result (dict): The authenticated credentials of the newly + registered user. + access_token (str|None): The access token of the newly logged in + device, or None if `inhibit_login` enabled. + bind_email (bool): Whether to bind the email with the identity + server + bind_msisdn (bool): Whether to bind the msisdn with the identity + server + """ + if self.hs.config.worker_app: + yield self._post_registration_client( + user_id=user_id, + auth_result=auth_result, + access_token=access_token, + bind_email=bind_email, + bind_msisdn=bind_msisdn, + ) + return + + if auth_result and LoginType.EMAIL_IDENTITY in auth_result: + threepid = auth_result[LoginType.EMAIL_IDENTITY] + # Necessary due to auth checks prior to the threepid being + # written to the db + if is_threepid_reserved( + self.hs.config.mau_limits_reserved_threepids, threepid + ): + yield self.store.upsert_monthly_active_user(user_id) + + yield self._register_email_threepid( + user_id, threepid, access_token, + bind_email, + ) + + if auth_result and LoginType.MSISDN in auth_result: + threepid = auth_result[LoginType.MSISDN] + yield self._register_msisdn_threepid( + user_id, threepid, bind_msisdn, + ) + + if auth_result and LoginType.TERMS in auth_result: + yield self._on_user_consented( + user_id, self.hs.config.user_consent_version, + ) + + @defer.inlineCallbacks + def _on_user_consented(self, user_id, consent_version): + """A user consented to the terms on registration + + Args: + user_id (str): The user ID that consented + consent_version (str): version of the policy the user has + consented to. + """ + logger.info("%s has consented to the privacy policy", user_id) + yield self.store.user_set_consent_version( + user_id, consent_version, + ) + yield self.post_consent_actions(user_id) + + @defer.inlineCallbacks + def _register_email_threepid(self, user_id, threepid, token, bind_email): + """Add an email address as a 3pid identifier + + Also adds an email pusher for the email address, if configured in the + HS config + + Also optionally binds emails to the given user_id on the identity server + + Must be called on master. + + Args: + user_id (str): id of user + threepid (object): m.login.email.identity auth response + token (str|None): access_token for the user, or None if not logged + in. + bind_email (bool): true if the client requested the email to be + bound at the identity server + Returns: + defer.Deferred: + """ + reqd = ('medium', 'address', 'validated_at') + if any(x not in threepid for x in reqd): + # This will only happen if the ID server returns a malformed response + logger.info("Can't add incomplete 3pid") + return + + yield self._auth_handler.add_threepid( + user_id, + threepid['medium'], + threepid['address'], + threepid['validated_at'], + ) + + # And we add an email pusher for them by default, but only + # if email notifications are enabled (so people don't start + # getting mail spam where they weren't before if email + # notifs are set up on a home server) + if (self.hs.config.email_enable_notifs and + self.hs.config.email_notif_for_new_users + and token): + # Pull the ID of the access token back out of the db + # It would really make more sense for this to be passed + # up when the access token is saved, but that's quite an + # invasive change I'd rather do separately. + user_tuple = yield self.store.get_user_by_access_token( + token + ) + token_id = user_tuple["token_id"] + + yield self.pusher_pool.add_pusher( + user_id=user_id, + access_token=token_id, + kind="email", + app_id="m.email", + app_display_name="Email Notifications", + device_display_name=threepid["address"], + pushkey=threepid["address"], + lang=None, # We don't know a user's language here + data={}, + ) + + if bind_email: + logger.info("bind_email specified: binding") + logger.debug("Binding emails %s to %s" % ( + threepid, user_id + )) + yield self.identity_handler.bind_threepid( + threepid['threepid_creds'], user_id + ) + else: + logger.info("bind_email not specified: not binding email") + + @defer.inlineCallbacks + def _register_msisdn_threepid(self, user_id, threepid, bind_msisdn): + """Add a phone number as a 3pid identifier + + Also optionally binds msisdn to the given user_id on the identity server + + Must be called on master. + + Args: + user_id (str): id of user + threepid (object): m.login.msisdn auth response + token (str): access_token for the user + bind_email (bool): true if the client requested the email to be + bound at the identity server + Returns: + defer.Deferred: + """ + try: + assert_params_in_dict(threepid, ['medium', 'address', 'validated_at']) + except SynapseError as ex: + if ex.errcode == Codes.MISSING_PARAM: + # This will only happen if the ID server returns a malformed response + logger.info("Can't add incomplete 3pid") + defer.returnValue(None) + raise + + yield self._auth_handler.add_threepid( + user_id, + threepid['medium'], + threepid['address'], + threepid['validated_at'], + ) + + if bind_msisdn: + logger.info("bind_msisdn specified: binding") + logger.debug("Binding msisdn %s to %s", threepid, user_id) + yield self.identity_handler.bind_threepid( + threepid['threepid_creds'], user_id + ) + else: + logger.info("bind_msisdn not specified: not binding msisdn") diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 3ba92bdb4c..67b15697fd 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -21,7 +21,7 @@ import math import string from collections import OrderedDict -from six import string_types +from six import iteritems, string_types from twisted.internet import defer @@ -32,10 +32,11 @@ from synapse.api.constants import ( JoinRules, RoomCreationPreset, ) -from synapse.api.errors import AuthError, Codes, StoreError, SynapseError +from synapse.api.errors import AuthError, Codes, NotFoundError, StoreError, SynapseError from synapse.storage.state import StateFilter from synapse.types import RoomAlias, RoomID, RoomStreamToken, StreamToken, UserID from synapse.util import stringutils +from synapse.util.async_helpers import Linearizer from synapse.visibility import filter_events_for_client from ._base import BaseHandler @@ -73,6 +74,372 @@ class RoomCreationHandler(BaseHandler): self.spam_checker = hs.get_spam_checker() self.event_creation_handler = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() + + # linearizer to stop two upgrades happening at once + self._upgrade_linearizer = Linearizer("room_upgrade_linearizer") + + @defer.inlineCallbacks + def upgrade_room(self, requester, old_room_id, new_version): + """Replace a room with a new room with a different version + + Args: + requester (synapse.types.Requester): the user requesting the upgrade + old_room_id (unicode): the id of the room to be replaced + new_version (unicode): the new room version to use + + Returns: + Deferred[unicode]: the new room id + """ + yield self.ratelimit(requester) + + user_id = requester.user.to_string() + + with (yield self._upgrade_linearizer.queue(old_room_id)): + # start by allocating a new room id + r = yield self.store.get_room(old_room_id) + if r is None: + raise NotFoundError("Unknown room id %s" % (old_room_id,)) + new_room_id = yield self._generate_room_id( + creator_id=user_id, is_public=r["is_public"], + ) + + logger.info("Creating new room %s to replace %s", new_room_id, old_room_id) + + # we create and auth the tombstone event before properly creating the new + # room, to check our user has perms in the old room. + tombstone_event, tombstone_context = ( + yield self.event_creation_handler.create_event( + requester, { + "type": EventTypes.Tombstone, + "state_key": "", + "room_id": old_room_id, + "sender": user_id, + "content": { + "body": "This room has been replaced", + "replacement_room": new_room_id, + } + }, + token_id=requester.access_token_id, + ) + ) + old_room_version = yield self.store.get_room_version(old_room_id) + yield self.auth.check_from_context( + old_room_version, tombstone_event, tombstone_context, + ) + + yield self.clone_existing_room( + requester, + old_room_id=old_room_id, + new_room_id=new_room_id, + new_room_version=new_version, + tombstone_event_id=tombstone_event.event_id, + ) + + # now send the tombstone + yield self.event_creation_handler.send_nonmember_event( + requester, tombstone_event, tombstone_context, + ) + + old_room_state = yield tombstone_context.get_current_state_ids(self.store) + + # update any aliases + yield self._move_aliases_to_new_room( + requester, old_room_id, new_room_id, old_room_state, + ) + + # and finally, shut down the PLs in the old room, and update them in the new + # room. + yield self._update_upgraded_room_pls( + requester, old_room_id, new_room_id, old_room_state, + ) + + defer.returnValue(new_room_id) + + @defer.inlineCallbacks + def _update_upgraded_room_pls( + self, requester, old_room_id, new_room_id, old_room_state, + ): + """Send updated power levels in both rooms after an upgrade + + Args: + requester (synapse.types.Requester): the user requesting the upgrade + old_room_id (unicode): the id of the room to be replaced + new_room_id (unicode): the id of the replacement room + old_room_state (dict[tuple[str, str], str]): the state map for the old room + + Returns: + Deferred + """ + old_room_pl_event_id = old_room_state.get((EventTypes.PowerLevels, "")) + + if old_room_pl_event_id is None: + logger.warning( + "Not supported: upgrading a room with no PL event. Not setting PLs " + "in old room.", + ) + return + + old_room_pl_state = yield self.store.get_event(old_room_pl_event_id) + + # we try to stop regular users from speaking by setting the PL required + # to send regular events and invites to 'Moderator' level. That's normally + # 50, but if the default PL in a room is 50 or more, then we set the + # required PL above that. + + pl_content = dict(old_room_pl_state.content) + users_default = int(pl_content.get("users_default", 0)) + restricted_level = max(users_default + 1, 50) + + updated = False + for v in ("invite", "events_default"): + current = int(pl_content.get(v, 0)) + if current < restricted_level: + logger.info( + "Setting level for %s in %s to %i (was %i)", + v, old_room_id, restricted_level, current, + ) + pl_content[v] = restricted_level + updated = True + else: + logger.info( + "Not setting level for %s (already %i)", + v, current, + ) + + if updated: + try: + yield self.event_creation_handler.create_and_send_nonmember_event( + requester, { + "type": EventTypes.PowerLevels, + "state_key": '', + "room_id": old_room_id, + "sender": requester.user.to_string(), + "content": pl_content, + }, ratelimit=False, + ) + except AuthError as e: + logger.warning("Unable to update PLs in old room: %s", e) + + logger.info("Setting correct PLs in new room") + yield self.event_creation_handler.create_and_send_nonmember_event( + requester, { + "type": EventTypes.PowerLevels, + "state_key": '', + "room_id": new_room_id, + "sender": requester.user.to_string(), + "content": old_room_pl_state.content, + }, ratelimit=False, + ) + + @defer.inlineCallbacks + def clone_existing_room( + self, requester, old_room_id, new_room_id, new_room_version, + tombstone_event_id, + ): + """Populate a new room based on an old room + + Args: + requester (synapse.types.Requester): the user requesting the upgrade + old_room_id (unicode): the id of the room to be replaced + new_room_id (unicode): the id to give the new room (should already have been + created with _gemerate_room_id()) + new_room_version (unicode): the new room version to use + tombstone_event_id (unicode|str): the ID of the tombstone event in the old + room. + Returns: + Deferred[None] + """ + user_id = requester.user.to_string() + + if not self.spam_checker.user_may_create_room(user_id): + raise SynapseError(403, "You are not permitted to create rooms") + + creation_content = { + "room_version": new_room_version, + "predecessor": { + "room_id": old_room_id, + "event_id": tombstone_event_id, + } + } + + # Check if old room was non-federatable + + # Get old room's create event + old_room_create_event = yield self.store.get_create_event_for_room(old_room_id) + + # Check if the create event specified a non-federatable room + if not old_room_create_event.content.get("m.federate", True): + # If so, mark the new room as non-federatable as well + creation_content["m.federate"] = False + + initial_state = dict() + + # Replicate relevant room events + types_to_copy = ( + (EventTypes.JoinRules, ""), + (EventTypes.Name, ""), + (EventTypes.Topic, ""), + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.GuestAccess, ""), + (EventTypes.RoomAvatar, ""), + (EventTypes.Encryption, ""), + (EventTypes.ServerACL, ""), + ) + + old_room_state_ids = yield self.store.get_filtered_current_state_ids( + old_room_id, StateFilter.from_types(types_to_copy), + ) + # map from event_id to BaseEvent + old_room_state_events = yield self.store.get_events(old_room_state_ids.values()) + + for k, old_event_id in iteritems(old_room_state_ids): + old_event = old_room_state_events.get(old_event_id) + if old_event: + initial_state[k] = old_event.content + + yield self._send_events_for_new_room( + requester, + new_room_id, + + # we expect to override all the presets with initial_state, so this is + # somewhat arbitrary. + preset_config=RoomCreationPreset.PRIVATE_CHAT, + + invite_list=[], + initial_state=initial_state, + creation_content=creation_content, + ) + + # Transfer membership events + old_room_member_state_ids = yield self.store.get_filtered_current_state_ids( + old_room_id, StateFilter.from_types([(EventTypes.Member, None)]), + ) + + # map from event_id to BaseEvent + old_room_member_state_events = yield self.store.get_events( + old_room_member_state_ids.values(), + ) + for k, old_event in iteritems(old_room_member_state_events): + # Only transfer ban events + if ("membership" in old_event.content and + old_event.content["membership"] == "ban"): + yield self.room_member_handler.update_membership( + requester, + UserID.from_string(old_event['state_key']), + new_room_id, + "ban", + ratelimit=False, + content=old_event.content, + ) + + # XXX invites/joins + # XXX 3pid invites + + @defer.inlineCallbacks + def _move_aliases_to_new_room( + self, requester, old_room_id, new_room_id, old_room_state, + ): + directory_handler = self.hs.get_handlers().directory_handler + + aliases = yield self.store.get_aliases_for_room(old_room_id) + + # check to see if we have a canonical alias. + canonical_alias = None + canonical_alias_event_id = old_room_state.get((EventTypes.CanonicalAlias, "")) + if canonical_alias_event_id: + canonical_alias_event = yield self.store.get_event(canonical_alias_event_id) + if canonical_alias_event: + canonical_alias = canonical_alias_event.content.get("alias", "") + + # first we try to remove the aliases from the old room (we suppress sending + # the room_aliases event until the end). + # + # Note that we'll only be able to remove aliases that (a) aren't owned by an AS, + # and (b) unless the user is a server admin, which the user created. + # + # This is probably correct - given we don't allow such aliases to be deleted + # normally, it would be odd to allow it in the case of doing a room upgrade - + # but it makes the upgrade less effective, and you have to wonder why a room + # admin can't remove aliases that point to that room anyway. + # (cf https://github.com/matrix-org/synapse/issues/2360) + # + removed_aliases = [] + for alias_str in aliases: + alias = RoomAlias.from_string(alias_str) + try: + yield directory_handler.delete_association( + requester, alias, send_event=False, + ) + removed_aliases.append(alias_str) + except SynapseError as e: + logger.warning( + "Unable to remove alias %s from old room: %s", + alias, e, + ) + + # if we didn't find any aliases, or couldn't remove anyway, we can skip the rest + # of this. + if not removed_aliases: + return + + try: + # this can fail if, for some reason, our user doesn't have perms to send + # m.room.aliases events in the old room (note that we've already checked that + # they have perms to send a tombstone event, so that's not terribly likely). + # + # If that happens, it's regrettable, but we should carry on: it's the same + # as when you remove an alias from the directory normally - it just means that + # the aliases event gets out of sync with the directory + # (cf https://github.com/vector-im/riot-web/issues/2369) + yield directory_handler.send_room_alias_update_event( + requester, old_room_id, + ) + except AuthError as e: + logger.warning( + "Failed to send updated alias event on old room: %s", e, + ) + + # we can now add any aliases we successfully removed to the new room. + for alias in removed_aliases: + try: + yield directory_handler.create_association( + requester, RoomAlias.from_string(alias), + new_room_id, servers=(self.hs.hostname, ), + send_event=False, + ) + logger.info("Moved alias %s to new room", alias) + except SynapseError as e: + # I'm not really expecting this to happen, but it could if the spam + # checking module decides it shouldn't, or similar. + logger.error( + "Error adding alias %s to new room: %s", + alias, e, + ) + + try: + if canonical_alias and (canonical_alias in removed_aliases): + yield self.event_creation_handler.create_and_send_nonmember_event( + requester, + { + "type": EventTypes.CanonicalAlias, + "state_key": "", + "room_id": new_room_id, + "sender": requester.user.to_string(), + "content": {"alias": canonical_alias, }, + }, + ratelimit=False + ) + + yield directory_handler.send_room_alias_update_event( + requester, new_room_id, + ) + except SynapseError as e: + # again I'm not really expecting this to fail, but if it does, I'd rather + # we returned the new room to the client at this point. + logger.error( + "Unable to send updated alias events in new room: %s", e, + ) @defer.inlineCallbacks def create_room(self, requester, config, ratelimit=True, @@ -104,7 +471,7 @@ class RoomCreationHandler(BaseHandler): """ user_id = requester.user.to_string() - self.auth.check_auth_blocking(user_id) + yield self.auth.check_auth_blocking(user_id) if not self.spam_checker.user_may_create_room(user_id): raise SynapseError(403, "You are not permitted to create rooms") @@ -165,28 +532,7 @@ class RoomCreationHandler(BaseHandler): visibility = config.get("visibility", None) is_public = visibility == "public" - # autogen room IDs and try to create it. We may clash, so just - # try a few times till one goes through, giving up eventually. - attempts = 0 - room_id = None - while attempts < 5: - try: - random_string = stringutils.random_string(18) - gen_room_id = RoomID( - random_string, - self.hs.hostname, - ) - yield self.store.store_room( - room_id=gen_room_id.to_string(), - room_creator_user_id=user_id, - is_public=is_public - ) - room_id = gen_room_id.to_string() - break - except StoreError: - attempts += 1 - if not room_id: - raise StoreError(500, "Couldn't generate a room ID.") + room_id = yield self._generate_room_id(creator_id=user_id, is_public=is_public) if room_alias: directory_handler = self.hs.get_handlers().directory_handler @@ -216,18 +562,15 @@ class RoomCreationHandler(BaseHandler): # override any attempt to set room versions via the creation_content creation_content["room_version"] = room_version - room_member_handler = self.hs.get_room_member_handler() - yield self._send_events_for_new_room( requester, room_id, - room_member_handler, preset_config=preset_config, invite_list=invite_list, initial_state=initial_state, creation_content=creation_content, room_alias=room_alias, - power_level_content_override=config.get("power_level_content_override", {}), + power_level_content_override=config.get("power_level_content_override"), creator_join_profile=creator_join_profile, ) @@ -263,7 +606,7 @@ class RoomCreationHandler(BaseHandler): if is_direct: content["is_direct"] = is_direct - yield room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester, UserID.from_string(invitee), room_id, @@ -301,14 +644,13 @@ class RoomCreationHandler(BaseHandler): self, creator, # A Requester object. room_id, - room_member_handler, preset_config, invite_list, initial_state, creation_content, - room_alias, - power_level_content_override, - creator_join_profile, + room_alias=None, + power_level_content_override=None, + creator_join_profile=None, ): def create(etype, content, **kwargs): e = { @@ -324,6 +666,7 @@ class RoomCreationHandler(BaseHandler): @defer.inlineCallbacks def send(etype, content, **kwargs): event = create(etype, content, **kwargs) + logger.info("Sending %s in new room", etype) yield self.event_creation_handler.create_and_send_nonmember_event( creator, event, @@ -346,7 +689,8 @@ class RoomCreationHandler(BaseHandler): content=creation_content, ) - yield room_member_handler.update_membership( + logger.info("Sending %s in new room", EventTypes.Member) + yield self.room_member_handler.update_membership( creator, creator.user, room_id, @@ -388,7 +732,8 @@ class RoomCreationHandler(BaseHandler): for invitee in invite_list: power_level_content["users"][invitee] = 100 - power_level_content.update(power_level_content_override) + if power_level_content_override: + power_level_content.update(power_level_content_override) yield send( etype=EventTypes.PowerLevels, @@ -427,6 +772,30 @@ class RoomCreationHandler(BaseHandler): content=content, ) + @defer.inlineCallbacks + def _generate_room_id(self, creator_id, is_public): + # autogen room IDs and try to create it. We may clash, so just + # try a few times till one goes through, giving up eventually. + attempts = 0 + while attempts < 5: + try: + random_string = stringutils.random_string(18) + gen_room_id = RoomID( + random_string, + self.hs.hostname, + ).to_string() + if isinstance(gen_room_id, bytes): + gen_room_id = gen_room_id.decode('utf-8') + yield self.store.store_room( + room_id=gen_room_id, + room_creator_user_id=creator_id, + is_public=is_public, + ) + defer.returnValue(gen_room_id) + except StoreError: + attempts += 1 + raise StoreError(500, "Couldn't generate a room ID.") + class RoomContextHandler(object): def __init__(self, hs): diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 4f51a464e7..afa508d729 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -74,8 +74,14 @@ class RoomListHandler(BaseHandler): # We explicitly don't bother caching searches or requests for # appservice specific lists. logger.info("Bypassing cache as search request.") + + # XXX: Quick hack to stop room directory queries taking too long. + # Timeout request after 60s. Probably want a more fundamental + # solution at some point + timeout = self.clock.time() + 60 return self._get_public_room_list( - limit, since_token, search_filter, network_tuple=network_tuple, + limit, since_token, search_filter, + network_tuple=network_tuple, timeout=timeout, ) key = (limit, since_token, network_tuple) @@ -90,7 +96,22 @@ class RoomListHandler(BaseHandler): def _get_public_room_list(self, limit=None, since_token=None, search_filter=None, network_tuple=EMPTY_THIRD_PARTY_ID, - from_federation=False,): + from_federation=False, + timeout=None,): + """Generate a public room list. + Args: + limit (int|None): Maximum amount of rooms to return. + since_token (str|None) + search_filter (dict|None): Dictionary to filter rooms by. + network_tuple (ThirdPartyInstanceID): Which public list to use. + This can be (None, None) to indicate the main list, or a particular + appservice and network id to use an appservice specific one. + Setting to None returns all public rooms across all lists. + from_federation (bool): Whether this request originated from a + federating server or a client. Used for room filtering. + timeout (int|None): Amount of seconds to wait for a response before + timing out. + """ if since_token and since_token != "END": since_token = RoomListNextBatch.from_token(since_token) else: @@ -205,6 +226,9 @@ class RoomListHandler(BaseHandler): chunk = [] for i in range(0, len(rooms_to_scan), step): + if timeout and self.clock.time() > timeout: + raise Exception("Timed out searching room directory") + batch = rooms_to_scan[i:i + step] logger.info("Processing %i rooms for result", len(batch)) yield concurrently_execute( diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 07fd3e82fc..190ea2c7b1 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -61,9 +61,9 @@ class RoomMemberHandler(object): self.federation_handler = hs.get_handlers().federation_handler self.directory_handler = hs.get_handlers().directory_handler - self.registration_handler = hs.get_handlers().registration_handler + self.registration_handler = hs.get_registration_handler() self.profile_handler = hs.get_profile_handler() - self.event_creation_hander = hs.get_event_creation_handler() + self.event_creation_handler = hs.get_event_creation_handler() self.member_linearizer = Linearizer(name="member") @@ -161,6 +161,8 @@ class RoomMemberHandler(object): ratelimit=True, content=None, ): + user_id = target.to_string() + if content is None: content = {} @@ -168,14 +170,14 @@ class RoomMemberHandler(object): if requester.is_guest: content["kind"] = "guest" - event, context = yield self.event_creation_hander.create_event( + event, context = yield self.event_creation_handler.create_event( requester, { "type": EventTypes.Member, "content": content, "room_id": room_id, "sender": requester.user.to_string(), - "state_key": target.to_string(), + "state_key": user_id, # For backwards compatibility: "membership": membership, @@ -186,14 +188,14 @@ class RoomMemberHandler(object): ) # Check if this event matches the previous membership event for the user. - duplicate = yield self.event_creation_hander.deduplicate_state_event( + duplicate = yield self.event_creation_handler.deduplicate_state_event( event, context, ) if duplicate is not None: # Discard the new event since this membership change is a no-op. defer.returnValue(duplicate) - yield self.event_creation_hander.handle_new_client_event( + yield self.event_creation_handler.handle_new_client_event( requester, event, context, @@ -204,12 +206,12 @@ class RoomMemberHandler(object): prev_state_ids = yield context.get_prev_state_ids(self.store) prev_member_event_id = prev_state_ids.get( - (EventTypes.Member, target.to_string()), + (EventTypes.Member, user_id), None ) if event.membership == Membership.JOIN: - # Only fire user_joined_room if the user has acutally joined the + # Only fire user_joined_room if the user has actually joined the # room. Don't bother if the user is just changing their profile # info. newly_joined = True @@ -218,6 +220,18 @@ class RoomMemberHandler(object): newly_joined = prev_member_event.membership != Membership.JOIN if newly_joined: yield self._user_joined_room(target, room_id) + + # Copy over direct message status and room tags if this is a join + # on an upgraded room + + # Check if this is an upgraded room + predecessor = yield self.store.get_room_predecessor(room_id) + + if predecessor: + # It is an upgraded room. Copy over old tags + self.copy_room_tags_and_direct_to_room( + predecessor["room_id"], room_id, user_id, + ) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) @@ -227,6 +241,55 @@ class RoomMemberHandler(object): defer.returnValue(event) @defer.inlineCallbacks + def copy_room_tags_and_direct_to_room( + self, + old_room_id, + new_room_id, + user_id, + ): + """Copies the tags and direct room state from one room to another. + + Args: + old_room_id (str) + new_room_id (str) + user_id (str) + + Returns: + Deferred[None] + """ + # Retrieve user account data for predecessor room + user_account_data, _ = yield self.store.get_account_data_for_user( + user_id, + ) + + # Copy direct message state if applicable + direct_rooms = user_account_data.get("m.direct", {}) + + # Check which key this room is under + if isinstance(direct_rooms, dict): + for key, room_id_list in direct_rooms.items(): + if old_room_id in room_id_list and new_room_id not in room_id_list: + # Add new room_id to this key + direct_rooms[key].append(new_room_id) + + # Save back to user's m.direct account data + yield self.store.add_account_data_for_user( + user_id, "m.direct", direct_rooms, + ) + break + + # Copy room tags if applicable + room_tags = yield self.store.get_tags_for_room( + user_id, old_room_id, + ) + + # Copy each room tag to the new room + for tag, tag_content in room_tags.items(): + yield self.store.add_tag_to_room( + user_id, new_room_id, tag, tag_content + ) + + @defer.inlineCallbacks def update_membership( self, requester, @@ -493,7 +556,7 @@ class RoomMemberHandler(object): else: requester = synapse.types.create_requester(target_user) - prev_event = yield self.event_creation_hander.deduplicate_state_event( + prev_event = yield self.event_creation_handler.deduplicate_state_event( event, context, ) if prev_event is not None: @@ -513,7 +576,7 @@ class RoomMemberHandler(object): if is_blocked: raise SynapseError(403, "This room has been blocked on this server") - yield self.event_creation_hander.handle_new_client_event( + yield self.event_creation_handler.handle_new_client_event( requester, event, context, @@ -527,7 +590,7 @@ class RoomMemberHandler(object): ) if event.membership == Membership.JOIN: - # Only fire user_joined_room if the user has acutally joined the + # Only fire user_joined_room if the user has actually joined the # room. Don't bother if the user is just changing their profile # info. newly_joined = True @@ -755,7 +818,7 @@ class RoomMemberHandler(object): ) ) - yield self.event_creation_hander.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.ThirdPartyInvite, @@ -877,7 +940,8 @@ class RoomMemberHandler(object): # first member event? create_event_id = current_state_ids.get(("m.room.create", "")) if len(current_state_ids) == 1 and create_event_id: - defer.returnValue(self.hs.is_mine_id(create_event_id)) + # We can only get here if we're in the process of creating the room + defer.returnValue(True) for etype, state_key in current_state_ids: if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 80e7b15de8..49c439313e 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -38,6 +38,41 @@ class SearchHandler(BaseHandler): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks + def get_old_rooms_from_upgraded_room(self, room_id): + """Retrieves room IDs of old rooms in the history of an upgraded room. + + We do so by checking the m.room.create event of the room for a + `predecessor` key. If it exists, we add the room ID to our return + list and then check that room for a m.room.create event and so on + until we can no longer find any more previous rooms. + + The full list of all found rooms in then returned. + + Args: + room_id (str): id of the room to search through. + + Returns: + Deferred[iterable[unicode]]: predecessor room ids + """ + + historical_room_ids = [] + + while True: + predecessor = yield self.store.get_room_predecessor(room_id) + + # If no predecessor, assume we've hit a dead end + if not predecessor: + break + + # Add predecessor's room ID + historical_room_ids.append(predecessor["room_id"]) + + # Scan through the old room for further predecessors + room_id = predecessor["room_id"] + + defer.returnValue(historical_room_ids) + + @defer.inlineCallbacks def search(self, user, content, batch=None): """Performs a full text search for a user. @@ -50,6 +85,9 @@ class SearchHandler(BaseHandler): dict to be returned to the client with results of search """ + if not self.hs.config.enable_search: + raise SynapseError(400, "Search is disabled on this homeserver") + batch_group = None batch_group_key = None batch_token = None @@ -134,6 +172,18 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) + # If doing a subset of all rooms seearch, check if any of the rooms + # are from an upgraded room, and search their contents as well + if search_filter.rooms: + historical_room_ids = [] + for room_id in search_filter.rooms: + # Add any previous rooms to the search if they exist + ids = yield self.get_old_rooms_from_upgraded_room(room_id) + historical_room_ids += ids + + # Prevent any historical events from being filtered + search_filter = search_filter.with_room_ids(historical_room_ids) + room_ids = search_filter.filter_rooms(room_ids) if batch_group == "room_id": diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 09739f2862..bd97241ab4 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -895,14 +895,17 @@ class SyncHandler(object): Returns: Deferred(SyncResult) """ - logger.info("Calculating sync response for %r", sync_config.user) - # NB: The now_token gets changed by some of the generate_sync_* methods, # this is due to some of the underlying streams not supporting the ability # to query up to a given point. # Always use the `now_token` in `SyncResultBuilder` now_token = yield self.event_sources.get_current_token() + logger.info( + "Calculating sync response for %r between %s and %s", + sync_config.user, since_token, now_token, + ) + user_id = sync_config.user.to_string() app_service = self.store.get_app_service_by_user_id(user_id) if app_service: @@ -1390,6 +1393,12 @@ class SyncHandler(object): room_entries = [] invited = [] for room_id, events in iteritems(mem_change_events_by_room_id): + logger.info( + "Membership changes in %s: [%s]", + room_id, + ", ".join(("%s (%s)" % (e.event_id, e.membership) for e in events)), + ) + non_joins = [e for e in events if e.membership != Membership.JOIN] has_join = len(non_joins) != len(events) @@ -1473,10 +1482,22 @@ class SyncHandler(object): if since_token and since_token.is_after(leave_token): continue + # If this is an out of band message, like a remote invite + # rejection, we include it in the recents batch. Otherwise, we + # let _load_filtered_recents handle fetching the correct + # batches. + # + # This is all screaming out for a refactor, as the logic here is + # subtle and the moving parts numerous. + if leave_event.internal_metadata.is_out_of_band_membership(): + batch_events = [leave_event] + else: + batch_events = None + room_entries.append(RoomSyncResultBuilder( room_id=room_id, rtype="archived", - events=None, + events=batch_events, newly_joined=room_id in newly_joined_rooms, full_state=False, since_token=since_token, @@ -1668,13 +1689,17 @@ class SyncHandler(object): "content": content, }) - account_data = sync_config.filter_collection.filter_room_account_data( + account_data_events = sync_config.filter_collection.filter_room_account_data( account_data_events ) ephemeral = sync_config.filter_collection.filter_room_ephemeral(ephemeral) - if not (always_include or batch or account_data or ephemeral or full_state): + if not (always_include + or batch + or account_data_events + or ephemeral + or full_state): return state = yield self.compute_state_delta( @@ -1745,7 +1770,7 @@ class SyncHandler(object): room_id=room_id, timeline=batch, state=state, - account_data=account_data, + account_data=account_data_events, ) if room_sync or always_include: sync_result_builder.archived.append(room_sync) diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index c610933dd4..a61bbf9392 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -63,11 +63,8 @@ class TypingHandler(object): self._member_typing_until = {} # clock time we expect to stop self._member_last_federation_poke = {} - # map room IDs to serial numbers - self._room_serials = {} self._latest_room_serial = 0 - # map room IDs to sets of users currently typing - self._room_typing = {} + self._reset() # caches which room_ids changed at which serials self._typing_stream_change_cache = StreamChangeCache( @@ -79,6 +76,15 @@ class TypingHandler(object): 5000, ) + def _reset(self): + """ + Reset the typing handler's data caches. + """ + # map room IDs to serial numbers + self._room_serials = {} + # map room IDs to sets of users currently typing + self._room_typing = {} + def _handle_timeouts(self): logger.info("Checking for typing timeouts") diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index f11b430126..283c6c1b81 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -19,6 +19,7 @@ from six import iteritems from twisted.internet import defer +import synapse.metrics from synapse.api.constants import EventTypes, JoinRules, Membership from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.roommember import ProfileInfo @@ -125,9 +126,12 @@ class UserDirectoryHandler(object): """ # FIXME(#3714): We should probably do this in the same worker as all # the other changes. - yield self.store.update_profile_in_user_dir( - user_id, profile.display_name, profile.avatar_url, None, - ) + is_support = yield self.store.is_support_user(user_id) + # Support users are for diagnostics and should not appear in the user directory. + if not is_support: + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url, None + ) @defer.inlineCallbacks def handle_user_deactivated(self, user_id): @@ -160,6 +164,12 @@ class UserDirectoryHandler(object): yield self._handle_deltas(deltas) self.pos = deltas[-1]["stream_id"] + + # Expose current event processing position to prometheus + synapse.metrics.event_processing_positions.labels("user_dir").set( + self.pos + ) + yield self.store.update_user_directory_stream_pos(self.pos) @defer.inlineCallbacks @@ -182,21 +192,25 @@ class UserDirectoryHandler(object): logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids)) yield self._handle_initial_room(room_id) num_processed_rooms += 1 - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) logger.info("Processed all rooms.") if self.search_all_users: num_processed_users = 0 user_ids = yield self.store.get_all_local_users() - logger.info("Doing initial update of user directory. %d users", len(user_ids)) + logger.info( + "Doing initial update of user directory. %d users", len(user_ids) + ) for user_id in user_ids: # We add profiles for all users even if they don't match the # include pattern, just in case we want to change it in future - logger.info("Handling user %d/%d", num_processed_users + 1, len(user_ids)) + logger.info( + "Handling user %d/%d", num_processed_users + 1, len(user_ids) + ) yield self._handle_local_user(user_id) num_processed_users += 1 - yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.0) logger.info("Processed all users") @@ -215,24 +229,24 @@ class UserDirectoryHandler(object): if not is_in_room: return - is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id) + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + room_id + ) users_with_profile = yield self.state.get_current_user_in_room(room_id) user_ids = set(users_with_profile) unhandled_users = user_ids - self.initially_handled_users yield self.store.add_profiles_to_user_dir( - room_id, { - user_id: users_with_profile[user_id] for user_id in unhandled_users - } + room_id, + {user_id: users_with_profile[user_id] for user_id in unhandled_users}, ) self.initially_handled_users |= unhandled_users if is_public: yield self.store.add_users_to_public_room( - room_id, - user_ids=user_ids - self.initially_handled_users_in_public + room_id, user_ids=user_ids - self.initially_handled_users_in_public ) self.initially_handled_users_in_public |= user_ids @@ -244,7 +258,7 @@ class UserDirectoryHandler(object): count = 0 for user_id in user_ids: if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) if not self.is_mine_id(user_id): count += 1 @@ -259,7 +273,7 @@ class UserDirectoryHandler(object): continue if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) count += 1 user_set = (user_id, other_user_id) @@ -281,25 +295,23 @@ class UserDirectoryHandler(object): if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert, + room_id, not is_public, to_insert ) to_insert.clear() if len(to_update) > self.INITIAL_ROOM_BATCH_SIZE: yield self.store.update_users_who_share_room( - room_id, not is_public, to_update, + room_id, not is_public, to_update ) to_update.clear() if to_insert: - yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert, - ) + yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) to_insert.clear() if to_update: yield self.store.update_users_who_share_room( - room_id, not is_public, to_update, + room_id, not is_public, to_update ) to_update.clear() @@ -320,50 +332,55 @@ class UserDirectoryHandler(object): # may have become public or not and add/remove the users in said room if typ in (EventTypes.RoomHistoryVisibility, EventTypes.JoinRules): yield self._handle_room_publicity_change( - room_id, prev_event_id, event_id, typ, + room_id, prev_event_id, event_id, typ ) elif typ == EventTypes.Member: change = yield self._get_key_change( - prev_event_id, event_id, + prev_event_id, + event_id, key_name="membership", public_value=Membership.JOIN, ) - if change is None: - # Handle any profile changes - yield self._handle_profile_change( - state_key, room_id, prev_event_id, event_id, - ) - continue - - if not change: + if change is False: # Need to check if the server left the room entirely, if so # we might need to remove all the users in that room is_in_room = yield self.store.is_host_joined( - room_id, self.server_name, + room_id, self.server_name ) if not is_in_room: logger.info("Server left room: %r", room_id) # Fetch all the users that we marked as being in user # directory due to being in the room and then check if # need to remove those users or not - user_ids = yield self.store.get_users_in_dir_due_to_room(room_id) + user_ids = yield self.store.get_users_in_dir_due_to_room( + room_id + ) for user_id in user_ids: yield self._handle_remove_user(room_id, user_id) return else: logger.debug("Server is still in room: %r", room_id) - if change: # The user joined - event = yield self.store.get_event(event_id, allow_none=True) - profile = ProfileInfo( - avatar_url=event.content.get("avatar_url"), - display_name=event.content.get("displayname"), - ) + is_support = yield self.store.is_support_user(state_key) + if not is_support: + if change is None: + # Handle any profile changes + yield self._handle_profile_change( + state_key, room_id, prev_event_id, event_id + ) + continue + + if change: # The user joined + event = yield self.store.get_event(event_id, allow_none=True) + profile = ProfileInfo( + avatar_url=event.content.get("avatar_url"), + display_name=event.content.get("displayname"), + ) - yield self._handle_new_user(room_id, state_key, profile) - else: # The user left - yield self._handle_remove_user(room_id, state_key) + yield self._handle_new_user(room_id, state_key, profile) + else: # The user left + yield self._handle_remove_user(room_id, state_key) else: logger.debug("Ignoring irrelevant type: %r", typ) @@ -382,13 +399,15 @@ class UserDirectoryHandler(object): if typ == EventTypes.RoomHistoryVisibility: change = yield self._get_key_change( - prev_event_id, event_id, + prev_event_id, + event_id, key_name="history_visibility", public_value="world_readable", ) elif typ == EventTypes.JoinRules: change = yield self._get_key_change( - prev_event_id, event_id, + prev_event_id, + event_id, key_name="join_rule", public_value=JoinRules.PUBLIC, ) @@ -513,7 +532,7 @@ class UserDirectoryHandler(object): ) if self.is_mine_id(other_user_id) and not is_appservice: shared_is_private = yield self.store.get_if_users_share_a_room( - other_user_id, user_id, + other_user_id, user_id ) if shared_is_private is True: # We've already marked in the database they share a private room @@ -528,13 +547,11 @@ class UserDirectoryHandler(object): to_insert.add((other_user_id, user_id)) if to_insert: - yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert, - ) + yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) if to_update: yield self.store.update_users_who_share_room( - room_id, not is_public, to_update, + room_id, not is_public, to_update ) @defer.inlineCallbacks @@ -553,15 +570,15 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_public_room(user_id) update_user_in_public = row and row["room_id"] == room_id - if (update_user_in_public or update_user_dir): + if update_user_in_public or update_user_dir: # XXX: Make this faster? rooms = yield self.store.get_rooms_for_user(user_id) for j_room_id in rooms: - if (not update_user_in_public and not update_user_dir): + if not update_user_in_public and not update_user_dir: break is_in_room = yield self.store.is_host_joined( - j_room_id, self.server_name, + j_room_id, self.server_name ) if not is_in_room: @@ -589,19 +606,19 @@ class UserDirectoryHandler(object): # Get a list of user tuples that were in the DB due to this room and # users (this includes tuples where the other user matches `user_id`) user_tuples = yield self.store.get_users_in_share_dir_with_room_id( - user_id, room_id, + user_id, room_id ) for user_id, other_user_id in user_tuples: # For each user tuple get a list of rooms that they still share, # trying to find a private room, and update the entry in the DB - rooms = yield self.store.get_rooms_in_common_for_users(user_id, other_user_id) + rooms = yield self.store.get_rooms_in_common_for_users( + user_id, other_user_id + ) # If they dont share a room anymore, remove the mapping if not rooms: - yield self.store.remove_user_who_share_room( - user_id, other_user_id, - ) + yield self.store.remove_user_who_share_room(user_id, other_user_id) continue found_public_share = None @@ -615,13 +632,13 @@ class UserDirectoryHandler(object): else: found_public_share = None yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)], + room_id, not is_public, [(user_id, other_user_id)] ) break if found_public_share: yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)], + room_id, not is_public, [(user_id, other_user_id)] ) @defer.inlineCallbacks @@ -649,7 +666,7 @@ class UserDirectoryHandler(object): if prev_name != new_name or prev_avatar != new_avatar: yield self.store.update_profile_in_user_dir( - user_id, new_name, new_avatar, room_id, + user_id, new_name, new_avatar, room_id ) @defer.inlineCallbacks diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index a3f9e4f67c..d36bcd6336 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -15,8 +15,10 @@ # limitations under the License. import re +from twisted.internet import task from twisted.internet.defer import CancelledError from twisted.python import failure +from twisted.web.client import FileBodyProducer from synapse.api.errors import SynapseError @@ -47,3 +49,16 @@ def redact_uri(uri): r'\1<redacted>\3', uri ) + + +class QuieterFileBodyProducer(FileBodyProducer): + """Wrapper for FileBodyProducer that avoids CRITICAL errors when the connection drops. + + Workaround for https://github.com/matrix-org/synapse/issues/4003 / + https://twistedmatrix.com/trac/ticket/6528 + """ + def stopProducing(self): + try: + FileBodyProducer.stopProducing(self) + except task.TaskStopped: + pass diff --git a/synapse/http/client.py b/synapse/http/client.py index 3d05f83b8c..ad454f4964 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -15,34 +15,36 @@ # limitations under the License. import logging +from io import BytesIO from six import text_type from six.moves import urllib import treq from canonicaljson import encode_canonical_json, json +from netaddr import IPAddress from prometheus_client import Counter +from zope.interface import implementer, provider from OpenSSL import SSL from OpenSSL.SSL import VERIFY_NONE -from twisted.internet import defer, protocol, reactor, ssl -from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS -from twisted.web._newclient import ResponseDone -from twisted.web.client import ( - Agent, - BrowserLikeRedirectAgent, - ContentDecoderAgent, - GzipDecoder, - HTTPConnectionPool, - PartialDownloadError, - readBody, +from twisted.internet import defer, protocol, ssl +from twisted.internet.interfaces import ( + IReactorPluggableNameResolver, + IResolutionReceiver, ) +from twisted.python.failure import Failure +from twisted.web._newclient import ResponseDone +from twisted.web.client import Agent, HTTPConnectionPool, PartialDownloadError, readBody from twisted.web.http import PotentialDataLoss from twisted.web.http_headers import Headers from synapse.api.errors import Codes, HttpResponseException, SynapseError -from synapse.http import cancelled_to_request_timed_out_error, redact_uri -from synapse.http.endpoint import SpiderEndpoint +from synapse.http import ( + QuieterFileBodyProducer, + cancelled_to_request_timed_out_error, + redact_uri, +) from synapse.util.async_helpers import timeout_deferred from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.logcontext import make_deferred_yieldable @@ -50,8 +52,125 @@ from synapse.util.logcontext import make_deferred_yieldable logger = logging.getLogger(__name__) outgoing_requests_counter = Counter("synapse_http_client_requests", "", ["method"]) -incoming_responses_counter = Counter("synapse_http_client_responses", "", - ["method", "code"]) +incoming_responses_counter = Counter( + "synapse_http_client_responses", "", ["method", "code"] +) + + +def check_against_blacklist(ip_address, ip_whitelist, ip_blacklist): + """ + Args: + ip_address (netaddr.IPAddress) + ip_whitelist (netaddr.IPSet) + ip_blacklist (netaddr.IPSet) + """ + if ip_address in ip_blacklist: + if ip_whitelist is None or ip_address not in ip_whitelist: + return True + return False + + +class IPBlacklistingResolver(object): + """ + A proxy for reactor.nameResolver which only produces non-blacklisted IP + addresses, preventing DNS rebinding attacks on URL preview. + """ + + def __init__(self, reactor, ip_whitelist, ip_blacklist): + """ + Args: + reactor (twisted.internet.reactor) + ip_whitelist (netaddr.IPSet) + ip_blacklist (netaddr.IPSet) + """ + self._reactor = reactor + self._ip_whitelist = ip_whitelist + self._ip_blacklist = ip_blacklist + + def resolveHostName(self, recv, hostname, portNumber=0): + + r = recv() + d = defer.Deferred() + addresses = [] + + @provider(IResolutionReceiver) + class EndpointReceiver(object): + @staticmethod + def resolutionBegan(resolutionInProgress): + pass + + @staticmethod + def addressResolved(address): + ip_address = IPAddress(address.host) + + if check_against_blacklist( + ip_address, self._ip_whitelist, self._ip_blacklist + ): + logger.info( + "Dropped %s from DNS resolution to %s" % (ip_address, hostname) + ) + raise SynapseError(403, "IP address blocked by IP blacklist entry") + + addresses.append(address) + + @staticmethod + def resolutionComplete(): + d.callback(addresses) + + self._reactor.nameResolver.resolveHostName( + EndpointReceiver, hostname, portNumber=portNumber + ) + + def _callback(addrs): + r.resolutionBegan(None) + for i in addrs: + r.addressResolved(i) + r.resolutionComplete() + + d.addCallback(_callback) + + return r + + +class BlacklistingAgentWrapper(Agent): + """ + An Agent wrapper which will prevent access to IP addresses being accessed + directly (without an IP address lookup). + """ + + def __init__(self, agent, reactor, ip_whitelist=None, ip_blacklist=None): + """ + Args: + agent (twisted.web.client.Agent): The Agent to wrap. + reactor (twisted.internet.reactor) + ip_whitelist (netaddr.IPSet) + ip_blacklist (netaddr.IPSet) + """ + self._agent = agent + self._ip_whitelist = ip_whitelist + self._ip_blacklist = ip_blacklist + + def request(self, method, uri, headers=None, bodyProducer=None): + h = urllib.parse.urlparse(uri.decode('ascii')) + + try: + ip_address = IPAddress(h.hostname) + + if check_against_blacklist( + ip_address, self._ip_whitelist, self._ip_blacklist + ): + logger.info( + "Blocking access to %s because of blacklist" % (ip_address,) + ) + e = SynapseError(403, "IP address blocked by IP blacklist entry") + return defer.fail(Failure(e)) + except Exception: + # Not an IP + pass + + return self._agent.request( + method, uri, headers=headers, bodyProducer=bodyProducer + ) class SimpleHttpClient(object): @@ -59,14 +178,54 @@ class SimpleHttpClient(object): A simple, no-frills HTTP client with methods that wrap up common ways of using HTTP in Matrix """ - def __init__(self, hs): + + def __init__(self, hs, treq_args={}, ip_whitelist=None, ip_blacklist=None): + """ + Args: + hs (synapse.server.HomeServer) + treq_args (dict): Extra keyword arguments to be given to treq.request. + ip_blacklist (netaddr.IPSet): The IP addresses that are blacklisted that + we may not request. + ip_whitelist (netaddr.IPSet): The whitelisted IP addresses, that we can + request if it were otherwise caught in a blacklist. + """ self.hs = hs - pool = HTTPConnectionPool(reactor) + self._ip_whitelist = ip_whitelist + self._ip_blacklist = ip_blacklist + self._extra_treq_args = treq_args + + self.user_agent = hs.version_string + self.clock = hs.get_clock() + if hs.config.user_agent_suffix: + self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix) + + self.user_agent = self.user_agent.encode('ascii') + + if self._ip_blacklist: + real_reactor = hs.get_reactor() + # If we have an IP blacklist, we need to use a DNS resolver which + # filters out blacklisted IP addresses, to prevent DNS rebinding. + nameResolver = IPBlacklistingResolver( + real_reactor, self._ip_whitelist, self._ip_blacklist + ) + + @implementer(IReactorPluggableNameResolver) + class Reactor(object): + def __getattr__(_self, attr): + if attr == "nameResolver": + return nameResolver + else: + return getattr(real_reactor, attr) + + self.reactor = Reactor() + else: + self.reactor = hs.get_reactor() # the pusher makes lots of concurrent SSL connections to sygnal, and - # tends to do so in batches, so we need to allow the pool to keep lots - # of idle connections around. + # tends to do so in batches, so we need to allow the pool to keep + # lots of idle connections around. + pool = HTTPConnectionPool(self.reactor) pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5)) pool.cachedConnectionTimeout = 2 * 60 @@ -74,20 +233,35 @@ class SimpleHttpClient(object): # BrowserLikePolicyForHTTPS which will do regular cert validation # 'like a browser' self.agent = Agent( - reactor, + self.reactor, connectTimeout=15, - contextFactory=hs.get_http_client_context_factory(), + contextFactory=self.hs.get_http_client_context_factory(), pool=pool, ) - self.user_agent = hs.version_string - self.clock = hs.get_clock() - if hs.config.user_agent_suffix: - self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix,) - self.user_agent = self.user_agent.encode('ascii') + if self._ip_blacklist: + # If we have an IP blacklist, we then install the blacklisting Agent + # which prevents direct access to IP addresses, that are not caught + # by the DNS resolution. + self.agent = BlacklistingAgentWrapper( + self.agent, + self.reactor, + ip_whitelist=self._ip_whitelist, + ip_blacklist=self._ip_blacklist, + ) @defer.inlineCallbacks - def request(self, method, uri, data=b'', headers=None): + def request(self, method, uri, data=None, headers=None): + """ + Args: + method (str): HTTP method to use. + uri (str): URI to query. + data (bytes): Data to send in the request body, if applicable. + headers (t.w.http_headers.Headers): Request headers. + + Raises: + SynapseError: If the IP is blacklisted. + """ # A small wrapper around self.agent.request() so we can easily attach # counters to it outgoing_requests_counter.labels(method).inc() @@ -96,26 +270,39 @@ class SimpleHttpClient(object): logger.info("Sending request %s %s", method, redact_uri(uri)) try: + body_producer = None + if data is not None: + body_producer = QuieterFileBodyProducer(BytesIO(data)) + request_deferred = treq.request( - method, uri, agent=self.agent, data=data, headers=headers + method, + uri, + agent=self.agent, + data=body_producer, + headers=headers, + **self._extra_treq_args ) request_deferred = timeout_deferred( - request_deferred, 60, self.hs.get_reactor(), + request_deferred, + 60, + self.hs.get_reactor(), cancelled_to_request_timed_out_error, ) response = yield make_deferred_yieldable(request_deferred) incoming_responses_counter.labels(method, response.code).inc() logger.info( - "Received response to %s %s: %s", - method, redact_uri(uri), response.code + "Received response to %s %s: %s", method, redact_uri(uri), response.code ) defer.returnValue(response) except Exception as e: incoming_responses_counter.labels(method, "ERR").inc() logger.info( "Error sending request to %s %s: %s %s", - method, redact_uri(uri), type(e).__name__, e.args[0] + method, + redact_uri(uri), + type(e).__name__, + e.args[0], ) raise @@ -140,8 +327,9 @@ class SimpleHttpClient(object): # TODO: Do we ever want to log message contents? logger.debug("post_urlencoded_get_json args: %s", args) - query_bytes = urllib.parse.urlencode( - encode_urlencode_args(args), True).encode("utf8") + query_bytes = urllib.parse.urlencode(encode_urlencode_args(args), True).encode( + "utf8" + ) actual_headers = { b"Content-Type": [b"application/x-www-form-urlencoded"], @@ -151,15 +339,13 @@ class SimpleHttpClient(object): actual_headers.update(headers) response = yield self.request( - "POST", - uri, - headers=Headers(actual_headers), - data=query_bytes + "POST", uri, headers=Headers(actual_headers), data=query_bytes ) + body = yield make_deferred_yieldable(readBody(response)) + if 200 <= response.code < 300: - body = yield make_deferred_yieldable(treq.json_content(response)) - defer.returnValue(body) + defer.returnValue(json.loads(body)) else: raise HttpResponseException(response.code, response.phrase, body) @@ -193,10 +379,7 @@ class SimpleHttpClient(object): actual_headers.update(headers) response = yield self.request( - "POST", - uri, - headers=Headers(actual_headers), - data=json_str + "POST", uri, headers=Headers(actual_headers), data=json_str ) body = yield make_deferred_yieldable(readBody(response)) @@ -264,10 +447,7 @@ class SimpleHttpClient(object): actual_headers.update(headers) response = yield self.request( - "PUT", - uri, - headers=Headers(actual_headers), - data=json_str + "PUT", uri, headers=Headers(actual_headers), data=json_str ) body = yield make_deferred_yieldable(readBody(response)) @@ -299,17 +479,11 @@ class SimpleHttpClient(object): query_bytes = urllib.parse.urlencode(args, True) uri = "%s?%s" % (uri, query_bytes) - actual_headers = { - b"User-Agent": [self.user_agent], - } + actual_headers = {b"User-Agent": [self.user_agent]} if headers: actual_headers.update(headers) - response = yield self.request( - "GET", - uri, - headers=Headers(actual_headers), - ) + response = yield self.request("GET", uri, headers=Headers(actual_headers)) body = yield make_deferred_yieldable(readBody(response)) @@ -334,22 +508,18 @@ class SimpleHttpClient(object): headers, absolute URI of the response and HTTP response code. """ - actual_headers = { - b"User-Agent": [self.user_agent], - } + actual_headers = {b"User-Agent": [self.user_agent]} if headers: actual_headers.update(headers) - response = yield self.request( - "GET", - url, - headers=Headers(actual_headers), - ) + response = yield self.request("GET", url, headers=Headers(actual_headers)) resp_headers = dict(response.headers.getAllRawHeaders()) - if (b'Content-Length' in resp_headers and - int(resp_headers[b'Content-Length']) > max_size): + if ( + b'Content-Length' in resp_headers + and int(resp_headers[b'Content-Length'][0]) > max_size + ): logger.warn("Requested URL is too large > %r bytes" % (self.max_size,)) raise SynapseError( 502, @@ -359,26 +529,20 @@ class SimpleHttpClient(object): if response.code > 299: logger.warn("Got %d when downloading %s" % (response.code, url)) - raise SynapseError( - 502, - "Got error %d" % (response.code,), - Codes.UNKNOWN, - ) + raise SynapseError(502, "Got error %d" % (response.code,), Codes.UNKNOWN) # TODO: if our Content-Type is HTML or something, just read the first # N bytes into RAM rather than saving it all to disk only to read it # straight back in again try: - length = yield make_deferred_yieldable(_readBodyToFile( - response, output_stream, max_size, - )) + length = yield make_deferred_yieldable( + _readBodyToFile(response, output_stream, max_size) + ) except Exception as e: logger.exception("Failed to download body") raise SynapseError( - 502, - ("Failed to download remote body: %s" % e), - Codes.UNKNOWN, + 502, ("Failed to download remote body: %s" % e), Codes.UNKNOWN ) defer.returnValue( @@ -387,13 +551,14 @@ class SimpleHttpClient(object): resp_headers, response.request.absoluteURI.decode('ascii'), response.code, - ), + ) ) # XXX: FIXME: This is horribly copy-pasted from matrixfederationclient. # The two should be factored out. + class _ReadBodyToFileProtocol(protocol.Protocol): def __init__(self, stream, deferred, max_size): self.stream = stream @@ -405,11 +570,13 @@ class _ReadBodyToFileProtocol(protocol.Protocol): self.stream.write(data) self.length += len(data) if self.max_size is not None and self.length >= self.max_size: - self.deferred.errback(SynapseError( - 502, - "Requested file is too large > %r bytes" % (self.max_size,), - Codes.TOO_LARGE, - )) + self.deferred.errback( + SynapseError( + 502, + "Requested file is too large > %r bytes" % (self.max_size,), + Codes.TOO_LARGE, + ) + ) self.deferred = defer.Deferred() self.transport.loseConnection() @@ -427,6 +594,7 @@ class _ReadBodyToFileProtocol(protocol.Protocol): # XXX: FIXME: This is horribly copy-pasted from matrixfederationclient. # The two should be factored out. + def _readBodyToFile(response, stream, max_size): d = defer.Deferred() response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size)) @@ -449,10 +617,12 @@ class CaptchaServerHttpClient(SimpleHttpClient): "POST", url, data=query_bytes, - headers=Headers({ - b"Content-Type": [b"application/x-www-form-urlencoded"], - b"User-Agent": [self.user_agent], - }) + headers=Headers( + { + b"Content-Type": [b"application/x-www-form-urlencoded"], + b"User-Agent": [self.user_agent], + } + ), ) try: @@ -463,57 +633,6 @@ class CaptchaServerHttpClient(SimpleHttpClient): defer.returnValue(e.response) -class SpiderEndpointFactory(object): - def __init__(self, hs): - self.blacklist = hs.config.url_preview_ip_range_blacklist - self.whitelist = hs.config.url_preview_ip_range_whitelist - self.policyForHTTPS = hs.get_http_client_context_factory() - - def endpointForURI(self, uri): - logger.info("Getting endpoint for %s", uri.toBytes()) - - if uri.scheme == b"http": - endpoint_factory = HostnameEndpoint - elif uri.scheme == b"https": - tlsCreator = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port) - - def endpoint_factory(reactor, host, port, **kw): - return wrapClientTLS( - tlsCreator, - HostnameEndpoint(reactor, host, port, **kw)) - else: - logger.warn("Can't get endpoint for unrecognised scheme %s", uri.scheme) - return None - return SpiderEndpoint( - reactor, uri.host, uri.port, self.blacklist, self.whitelist, - endpoint=endpoint_factory, endpoint_kw_args=dict(timeout=15), - ) - - -class SpiderHttpClient(SimpleHttpClient): - """ - Separate HTTP client for spidering arbitrary URLs. - Special in that it follows retries and has a UA that looks - like a browser. - - used by the preview_url endpoint in the content repo. - """ - def __init__(self, hs): - SimpleHttpClient.__init__(self, hs) - # clobber the base class's agent and UA: - self.agent = ContentDecoderAgent( - BrowserLikeRedirectAgent( - Agent.usingEndpointFactory( - reactor, - SpiderEndpointFactory(hs) - ) - ), [(b'gzip', GzipDecoder)] - ) - # We could look like Chrome: - # self.user_agent = ("Mozilla/5.0 (%s) (KHTML, like Gecko) - # Chrome Safari" % hs.version_string) - - def encode_urlencode_args(args): return {k: encode_urlencode_arg(v) for k, v in args.items()} diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index 91025037a3..cd79ebab62 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -12,30 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import collections import logging -import random import re -import time - -from twisted.internet import defer -from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS -from twisted.internet.error import ConnectError -from twisted.names import client, dns -from twisted.names.error import DNSNameError, DomainError logger = logging.getLogger(__name__) -SERVER_CACHE = {} - -# our record of an individual server which can be tried to reach a destination. -# -# "host" is the hostname acquired from the SRV record. Except when there's -# no SRV record, in which case it is the original hostname. -_Server = collections.namedtuple( - "_Server", "priority weight host port expires" -) - def parse_server_name(server_name): """Split a server name into host/port parts. @@ -100,299 +81,3 @@ def parse_and_validate_server_name(server_name): )) return host, port - - -def matrix_federation_endpoint(reactor, destination, tls_client_options_factory=None, - timeout=None): - """Construct an endpoint for the given matrix destination. - - Args: - reactor: Twisted reactor. - destination (unicode): The name of the server to connect to. - tls_client_options_factory - (synapse.crypto.context_factory.ClientTLSOptionsFactory): - Factory which generates TLS options for client connections. - timeout (int): connection timeout in seconds - """ - - domain, port = parse_server_name(destination) - - endpoint_kw_args = {} - - if timeout is not None: - endpoint_kw_args.update(timeout=timeout) - - if tls_client_options_factory is None: - transport_endpoint = HostnameEndpoint - default_port = 8008 - else: - # the SNI string should be the same as the Host header, minus the port. - # as per https://github.com/matrix-org/synapse/issues/2525#issuecomment-336896777, - # the Host header and SNI should therefore be the server_name of the remote - # server. - tls_options = tls_client_options_factory.get_options(domain) - - def transport_endpoint(reactor, host, port, timeout): - return wrapClientTLS( - tls_options, - HostnameEndpoint(reactor, host, port, timeout=timeout), - ) - default_port = 8448 - - if port is None: - return _WrappingEndpointFac(SRVClientEndpoint( - reactor, "matrix", domain, protocol="tcp", - default_port=default_port, endpoint=transport_endpoint, - endpoint_kw_args=endpoint_kw_args - ), reactor) - else: - return _WrappingEndpointFac(transport_endpoint( - reactor, domain, port, **endpoint_kw_args - ), reactor) - - -class _WrappingEndpointFac(object): - def __init__(self, endpoint_fac, reactor): - self.endpoint_fac = endpoint_fac - self.reactor = reactor - - @defer.inlineCallbacks - def connect(self, protocolFactory): - conn = yield self.endpoint_fac.connect(protocolFactory) - conn = _WrappedConnection(conn, self.reactor) - defer.returnValue(conn) - - -class _WrappedConnection(object): - """Wraps a connection and calls abort on it if it hasn't seen any action - for 2.5-3 minutes. - """ - __slots__ = ["conn", "last_request"] - - def __init__(self, conn, reactor): - object.__setattr__(self, "conn", conn) - object.__setattr__(self, "last_request", time.time()) - self._reactor = reactor - - def __getattr__(self, name): - return getattr(self.conn, name) - - def __setattr__(self, name, value): - setattr(self.conn, name, value) - - def _time_things_out_maybe(self): - # We use a slightly shorter timeout here just in case the callLater is - # triggered early. Paranoia ftw. - # TODO: Cancel the previous callLater rather than comparing time.time()? - if time.time() - self.last_request >= 2.5 * 60: - self.abort() - # Abort the underlying TLS connection. The abort() method calls - # loseConnection() on the TLS connection which tries to - # shutdown the connection cleanly. We call abortConnection() - # since that will promptly close the TLS connection. - # - # In Twisted >18.4; the TLS connection will be None if it has closed - # which will make abortConnection() throw. Check that the TLS connection - # is not None before trying to close it. - if self.transport.getHandle() is not None: - self.transport.abortConnection() - - def request(self, request): - self.last_request = time.time() - - # Time this connection out if we haven't send a request in the last - # N minutes - # TODO: Cancel the previous callLater? - self._reactor.callLater(3 * 60, self._time_things_out_maybe) - - d = self.conn.request(request) - - def update_request_time(res): - self.last_request = time.time() - # TODO: Cancel the previous callLater? - self._reactor.callLater(3 * 60, self._time_things_out_maybe) - return res - - d.addCallback(update_request_time) - - return d - - -class SpiderEndpoint(object): - """An endpoint which refuses to connect to blacklisted IP addresses - Implements twisted.internet.interfaces.IStreamClientEndpoint. - """ - def __init__(self, reactor, host, port, blacklist, whitelist, - endpoint=HostnameEndpoint, endpoint_kw_args={}): - self.reactor = reactor - self.host = host - self.port = port - self.blacklist = blacklist - self.whitelist = whitelist - self.endpoint = endpoint - self.endpoint_kw_args = endpoint_kw_args - - @defer.inlineCallbacks - def connect(self, protocolFactory): - address = yield self.reactor.resolve(self.host) - - from netaddr import IPAddress - ip_address = IPAddress(address) - - if ip_address in self.blacklist: - if self.whitelist is None or ip_address not in self.whitelist: - raise ConnectError( - "Refusing to spider blacklisted IP address %s" % address - ) - - logger.info("Connecting to %s:%s", address, self.port) - endpoint = self.endpoint( - self.reactor, address, self.port, **self.endpoint_kw_args - ) - connection = yield endpoint.connect(protocolFactory) - defer.returnValue(connection) - - -class SRVClientEndpoint(object): - """An endpoint which looks up SRV records for a service. - Cycles through the list of servers starting with each call to connect - picking the next server. - Implements twisted.internet.interfaces.IStreamClientEndpoint. - """ - - def __init__(self, reactor, service, domain, protocol="tcp", - default_port=None, endpoint=HostnameEndpoint, - endpoint_kw_args={}): - self.reactor = reactor - self.service_name = "_%s._%s.%s" % (service, protocol, domain) - - if default_port is not None: - self.default_server = _Server( - host=domain, - port=default_port, - priority=0, - weight=0, - expires=0, - ) - else: - self.default_server = None - - self.endpoint = endpoint - self.endpoint_kw_args = endpoint_kw_args - - self.servers = None - self.used_servers = None - - @defer.inlineCallbacks - def fetch_servers(self): - self.used_servers = [] - self.servers = yield resolve_service(self.service_name) - - def pick_server(self): - if not self.servers: - if self.used_servers: - self.servers = self.used_servers - self.used_servers = [] - self.servers.sort() - elif self.default_server: - return self.default_server - else: - raise ConnectError( - "No server available for %s" % self.service_name - ) - - # look for all servers with the same priority - min_priority = self.servers[0].priority - weight_indexes = list( - (index, server.weight + 1) - for index, server in enumerate(self.servers) - if server.priority == min_priority - ) - - total_weight = sum(weight for index, weight in weight_indexes) - target_weight = random.randint(0, total_weight) - for index, weight in weight_indexes: - target_weight -= weight - if target_weight <= 0: - server = self.servers[index] - # XXX: this looks totally dubious: - # - # (a) we never reuse a server until we have been through - # all of the servers at the same priority, so if the - # weights are A: 100, B:1, we always do ABABAB instead of - # AAAA...AAAB (approximately). - # - # (b) After using all the servers at the lowest priority, - # we move onto the next priority. We should only use the - # second priority if servers at the top priority are - # unreachable. - # - del self.servers[index] - self.used_servers.append(server) - return server - - @defer.inlineCallbacks - def connect(self, protocolFactory): - if self.servers is None: - yield self.fetch_servers() - server = self.pick_server() - logger.info("Connecting to %s:%s", server.host, server.port) - endpoint = self.endpoint( - self.reactor, server.host, server.port, **self.endpoint_kw_args - ) - connection = yield endpoint.connect(protocolFactory) - defer.returnValue(connection) - - -@defer.inlineCallbacks -def resolve_service(service_name, dns_client=client, cache=SERVER_CACHE, clock=time): - cache_entry = cache.get(service_name, None) - if cache_entry: - if all(s.expires > int(clock.time()) for s in cache_entry): - servers = list(cache_entry) - defer.returnValue(servers) - - servers = [] - - try: - try: - answers, _, _ = yield dns_client.lookupService(service_name) - except DNSNameError: - defer.returnValue([]) - - if (len(answers) == 1 - and answers[0].type == dns.SRV - and answers[0].payload - and answers[0].payload.target == dns.Name(b'.')): - raise ConnectError("Service %s unavailable" % service_name) - - for answer in answers: - if answer.type != dns.SRV or not answer.payload: - continue - - payload = answer.payload - - servers.append(_Server( - host=str(payload.target), - port=int(payload.port), - priority=int(payload.priority), - weight=int(payload.weight), - expires=int(clock.time()) + answer.ttl, - )) - - servers.sort() - cache[service_name] = list(servers) - except DomainError as e: - # We failed to resolve the name (other than a NameError) - # Try something in the cache, else rereaise - cache_entry = cache.get(service_name, None) - if cache_entry: - logger.warn( - "Failed to resolve %r, falling back to cache. %r", - service_name, e - ) - servers = list(cache_entry) - else: - raise e - - defer.returnValue(servers) diff --git a/synapse/rest/key/v1/__init__.py b/synapse/http/federation/__init__.py index fe0ac3f8e9..1453d04571 100644 --- a/synapse/rest/key/v1/__init__.py +++ b/synapse/http/federation/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py new file mode 100644 index 0000000000..384d8a37a2 --- /dev/null +++ b/synapse/http/federation/matrix_federation_agent.py @@ -0,0 +1,452 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import logging +import random +import time + +import attr +from netaddr import IPAddress +from zope.interface import implementer + +from twisted.internet import defer +from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS +from twisted.internet.interfaces import IStreamClientEndpoint +from twisted.web.client import URI, Agent, HTTPConnectionPool, RedirectAgent, readBody +from twisted.web.http import stringToDatetime +from twisted.web.http_headers import Headers +from twisted.web.iweb import IAgent + +from synapse.http.federation.srv_resolver import SrvResolver, pick_server_from_list +from synapse.util import Clock +from synapse.util.caches.ttlcache import TTLCache +from synapse.util.logcontext import make_deferred_yieldable +from synapse.util.metrics import Measure + +# period to cache .well-known results for by default +WELL_KNOWN_DEFAULT_CACHE_PERIOD = 24 * 3600 + +# jitter to add to the .well-known default cache ttl +WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER = 10 * 60 + +# period to cache failure to fetch .well-known for +WELL_KNOWN_INVALID_CACHE_PERIOD = 1 * 3600 + +# cap for .well-known cache period +WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600 + +logger = logging.getLogger(__name__) +well_known_cache = TTLCache('well-known') + + +@implementer(IAgent) +class MatrixFederationAgent(object): + """An Agent-like thing which provides a `request` method which will look up a matrix + server and send an HTTP request to it. + + Doesn't implement any retries. (Those are done in MatrixFederationHttpClient.) + + Args: + reactor (IReactor): twisted reactor to use for underlying requests + + tls_client_options_factory (ClientTLSOptionsFactory|None): + factory to use for fetching client tls options, or none to disable TLS. + + _well_known_tls_policy (IPolicyForHTTPS|None): + TLS policy to use for fetching .well-known files. None to use a default + (browser-like) implementation. + + srv_resolver (SrvResolver|None): + SRVResolver impl to use for looking up SRV records. None to use a default + implementation. + """ + + def __init__( + self, reactor, tls_client_options_factory, + _well_known_tls_policy=None, + _srv_resolver=None, + _well_known_cache=well_known_cache, + ): + self._reactor = reactor + self._clock = Clock(reactor) + + self._tls_client_options_factory = tls_client_options_factory + if _srv_resolver is None: + _srv_resolver = SrvResolver() + self._srv_resolver = _srv_resolver + + self._pool = HTTPConnectionPool(reactor) + self._pool.retryAutomatically = False + self._pool.maxPersistentPerHost = 5 + self._pool.cachedConnectionTimeout = 2 * 60 + + agent_args = {} + if _well_known_tls_policy is not None: + # the param is called 'contextFactory', but actually passing a + # contextfactory is deprecated, and it expects an IPolicyForHTTPS. + agent_args['contextFactory'] = _well_known_tls_policy + _well_known_agent = RedirectAgent( + Agent(self._reactor, pool=self._pool, **agent_args), + ) + self._well_known_agent = _well_known_agent + + # our cache of .well-known lookup results, mapping from server name + # to delegated name. The values can be: + # `bytes`: a valid server-name + # `None`: there is no (valid) .well-known here + self._well_known_cache = _well_known_cache + + @defer.inlineCallbacks + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Args: + method (bytes): HTTP method: GET/POST/etc + + uri (bytes): Absolute URI to be retrieved + + headers (twisted.web.http_headers.Headers|None): + HTTP headers to send with the request, or None to + send no extra headers. + + bodyProducer (twisted.web.iweb.IBodyProducer|None): + An object which can generate bytes to make up the + body of this request (for example, the properly encoded contents of + a file for a file upload). Or None if the request is to have + no body. + + Returns: + Deferred[twisted.web.iweb.IResponse]: + fires when the header of the response has been received (regardless of the + response status code). Fails if there is any problem which prevents that + response from being received (including problems that prevent the request + from being sent). + """ + parsed_uri = URI.fromBytes(uri, defaultPort=-1) + res = yield self._route_matrix_uri(parsed_uri) + + # set up the TLS connection params + # + # XXX disabling TLS is really only supported here for the benefit of the + # unit tests. We should make the UTs cope with TLS rather than having to make + # the code support the unit tests. + if self._tls_client_options_factory is None: + tls_options = None + else: + tls_options = self._tls_client_options_factory.get_options( + res.tls_server_name.decode("ascii") + ) + + # make sure that the Host header is set correctly + if headers is None: + headers = Headers() + else: + headers = headers.copy() + + if not headers.hasHeader(b'host'): + headers.addRawHeader(b'host', res.host_header) + + class EndpointFactory(object): + @staticmethod + def endpointForURI(_uri): + ep = LoggingHostnameEndpoint( + self._reactor, res.target_host, res.target_port, + ) + if tls_options is not None: + ep = wrapClientTLS(tls_options, ep) + return ep + + agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(), self._pool) + res = yield make_deferred_yieldable( + agent.request(method, uri, headers, bodyProducer) + ) + defer.returnValue(res) + + @defer.inlineCallbacks + def _route_matrix_uri(self, parsed_uri, lookup_well_known=True): + """Helper for `request`: determine the routing for a Matrix URI + + Args: + parsed_uri (twisted.web.client.URI): uri to route. Note that it should be + parsed with URI.fromBytes(uri, defaultPort=-1) to set the `port` to -1 + if there is no explicit port given. + + lookup_well_known (bool): True if we should look up the .well-known file if + there is no SRV record. + + Returns: + Deferred[_RoutingResult] + """ + # check for an IP literal + try: + ip_address = IPAddress(parsed_uri.host.decode("ascii")) + except Exception: + # not an IP address + ip_address = None + + if ip_address: + port = parsed_uri.port + if port == -1: + port = 8448 + defer.returnValue(_RoutingResult( + host_header=parsed_uri.netloc, + tls_server_name=parsed_uri.host, + target_host=parsed_uri.host, + target_port=port, + )) + + if parsed_uri.port != -1: + # there is an explicit port + defer.returnValue(_RoutingResult( + host_header=parsed_uri.netloc, + tls_server_name=parsed_uri.host, + target_host=parsed_uri.host, + target_port=parsed_uri.port, + )) + + if lookup_well_known: + # try a .well-known lookup + well_known_server = yield self._get_well_known(parsed_uri.host) + + if well_known_server: + # if we found a .well-known, start again, but don't do another + # .well-known lookup. + + # parse the server name in the .well-known response into host/port. + # (This code is lifted from twisted.web.client.URI.fromBytes). + if b':' in well_known_server: + well_known_host, well_known_port = well_known_server.rsplit(b':', 1) + try: + well_known_port = int(well_known_port) + except ValueError: + # the part after the colon could not be parsed as an int + # - we assume it is an IPv6 literal with no port (the closing + # ']' stops it being parsed as an int) + well_known_host, well_known_port = well_known_server, -1 + else: + well_known_host, well_known_port = well_known_server, -1 + + new_uri = URI( + scheme=parsed_uri.scheme, + netloc=well_known_server, + host=well_known_host, + port=well_known_port, + path=parsed_uri.path, + params=parsed_uri.params, + query=parsed_uri.query, + fragment=parsed_uri.fragment, + ) + + res = yield self._route_matrix_uri(new_uri, lookup_well_known=False) + defer.returnValue(res) + + # try a SRV lookup + service_name = b"_matrix._tcp.%s" % (parsed_uri.host,) + server_list = yield self._srv_resolver.resolve_service(service_name) + + if not server_list: + target_host = parsed_uri.host + port = 8448 + logger.debug( + "No SRV record for %s, using %s:%i", + parsed_uri.host.decode("ascii"), target_host.decode("ascii"), port, + ) + else: + target_host, port = pick_server_from_list(server_list) + logger.debug( + "Picked %s:%i from SRV records for %s", + target_host.decode("ascii"), port, parsed_uri.host.decode("ascii"), + ) + + defer.returnValue(_RoutingResult( + host_header=parsed_uri.netloc, + tls_server_name=parsed_uri.host, + target_host=target_host, + target_port=port, + )) + + @defer.inlineCallbacks + def _get_well_known(self, server_name): + """Attempt to fetch and parse a .well-known file for the given server + + Args: + server_name (bytes): name of the server, from the requested url + + Returns: + Deferred[bytes|None]: either the new server name, from the .well-known, or + None if there was no .well-known file. + """ + try: + result = self._well_known_cache[server_name] + except KeyError: + # TODO: should we linearise so that we don't end up doing two .well-known + # requests for the same server in parallel? + with Measure(self._clock, "get_well_known"): + result, cache_period = yield self._do_get_well_known(server_name) + + if cache_period > 0: + self._well_known_cache.set(server_name, result, cache_period) + + defer.returnValue(result) + + @defer.inlineCallbacks + def _do_get_well_known(self, server_name): + """Actually fetch and parse a .well-known, without checking the cache + + Args: + server_name (bytes): name of the server, from the requested url + + Returns: + Deferred[Tuple[bytes|None|object],int]: + result, cache period, where result is one of: + - the new server name from the .well-known (as a `bytes`) + - None if there was no .well-known file. + - INVALID_WELL_KNOWN if the .well-known was invalid + """ + uri = b"https://%s/.well-known/matrix/server" % (server_name, ) + uri_str = uri.decode("ascii") + logger.info("Fetching %s", uri_str) + try: + response = yield make_deferred_yieldable( + self._well_known_agent.request(b"GET", uri), + ) + body = yield make_deferred_yieldable(readBody(response)) + if response.code != 200: + raise Exception("Non-200 response %s" % (response.code, )) + + parsed_body = json.loads(body.decode('utf-8')) + logger.info("Response from .well-known: %s", parsed_body) + if not isinstance(parsed_body, dict): + raise Exception("not a dict") + if "m.server" not in parsed_body: + raise Exception("Missing key 'm.server'") + except Exception as e: + logger.info("Error fetching %s: %s", uri_str, e) + + # add some randomness to the TTL to avoid a stampeding herd every hour + # after startup + cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD + cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) + defer.returnValue((None, cache_period)) + + result = parsed_body["m.server"].encode("ascii") + + cache_period = _cache_period_from_headers( + response.headers, + time_now=self._reactor.seconds, + ) + if cache_period is None: + cache_period = WELL_KNOWN_DEFAULT_CACHE_PERIOD + # add some randomness to the TTL to avoid a stampeding herd every 24 hours + # after startup + cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) + else: + cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD) + + defer.returnValue((result, cache_period)) + + +@implementer(IStreamClientEndpoint) +class LoggingHostnameEndpoint(object): + """A wrapper for HostnameEndpint which logs when it connects""" + def __init__(self, reactor, host, port, *args, **kwargs): + self.host = host + self.port = port + self.ep = HostnameEndpoint(reactor, host, port, *args, **kwargs) + + def connect(self, protocol_factory): + logger.info("Connecting to %s:%i", self.host.decode("ascii"), self.port) + return self.ep.connect(protocol_factory) + + +def _cache_period_from_headers(headers, time_now=time.time): + cache_controls = _parse_cache_control(headers) + + if b'no-store' in cache_controls: + return 0 + + if b'max-age' in cache_controls: + try: + max_age = int(cache_controls[b'max-age']) + return max_age + except ValueError: + pass + + expires = headers.getRawHeaders(b'expires') + if expires is not None: + try: + expires_date = stringToDatetime(expires[-1]) + return expires_date - time_now() + except ValueError: + # RFC7234 says 'A cache recipient MUST interpret invalid date formats, + # especially the value "0", as representing a time in the past (i.e., + # "already expired"). + return 0 + + return None + + +def _parse_cache_control(headers): + cache_controls = {} + for hdr in headers.getRawHeaders(b'cache-control', []): + for directive in hdr.split(b','): + splits = [x.strip() for x in directive.split(b'=', 1)] + k = splits[0].lower() + v = splits[1] if len(splits) > 1 else None + cache_controls[k] = v + return cache_controls + + +@attr.s +class _RoutingResult(object): + """The result returned by `_route_matrix_uri`. + + Contains the parameters needed to direct a federation connection to a particular + server. + + Where a SRV record points to several servers, this object contains a single server + chosen from the list. + """ + + host_header = attr.ib() + """ + The value we should assign to the Host header (host:port from the matrix + URI, or .well-known). + + :type: bytes + """ + + tls_server_name = attr.ib() + """ + The server name we should set in the SNI (typically host, without port, from the + matrix URI or .well-known) + + :type: bytes + """ + + target_host = attr.ib() + """ + The hostname (or IP literal) we should route the TCP connection to (the target of the + SRV record, or the hostname from the URL/.well-known) + + :type: bytes + """ + + target_port = attr.ib() + """ + The port we should route the TCP connection to (the target of the SRV record, or + the port from the URL/.well-known, or 8448) + + :type: int + """ diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py new file mode 100644 index 0000000000..71830c549d --- /dev/null +++ b/synapse/http/federation/srv_resolver.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import random +import time + +import attr + +from twisted.internet import defer +from twisted.internet.error import ConnectError +from twisted.names import client, dns +from twisted.names.error import DNSNameError, DomainError + +from synapse.util.logcontext import make_deferred_yieldable + +logger = logging.getLogger(__name__) + +SERVER_CACHE = {} + + +@attr.s +class Server(object): + """ + Our record of an individual server which can be tried to reach a destination. + + Attributes: + host (bytes): target hostname + port (int): + priority (int): + weight (int): + expires (int): when the cache should expire this record - in *seconds* since + the epoch + """ + host = attr.ib() + port = attr.ib() + priority = attr.ib(default=0) + weight = attr.ib(default=0) + expires = attr.ib(default=0) + + +def pick_server_from_list(server_list): + """Randomly choose a server from the server list + + Args: + server_list (list[Server]): list of candidate servers + + Returns: + Tuple[bytes, int]: (host, port) pair for the chosen server + """ + if not server_list: + raise RuntimeError("pick_server_from_list called with empty list") + + # TODO: currently we only use the lowest-priority servers. We should maintain a + # cache of servers known to be "down" and filter them out + + min_priority = min(s.priority for s in server_list) + eligible_servers = list(s for s in server_list if s.priority == min_priority) + total_weight = sum(s.weight for s in eligible_servers) + target_weight = random.randint(0, total_weight) + + for s in eligible_servers: + target_weight -= s.weight + + if target_weight <= 0: + return s.host, s.port + + # this should be impossible. + raise RuntimeError( + "pick_server_from_list got to end of eligible server list.", + ) + + +class SrvResolver(object): + """Interface to the dns client to do SRV lookups, with result caching. + + The default resolver in twisted.names doesn't do any caching (it has a CacheResolver, + but the cache never gets populated), so we add our own caching layer here. + + Args: + dns_client (twisted.internet.interfaces.IResolver): twisted resolver impl + cache (dict): cache object + get_time (callable): clock implementation. Should return seconds since the epoch + """ + def __init__(self, dns_client=client, cache=SERVER_CACHE, get_time=time.time): + self._dns_client = dns_client + self._cache = cache + self._get_time = get_time + + @defer.inlineCallbacks + def resolve_service(self, service_name): + """Look up a SRV record + + Args: + service_name (bytes): record to look up + + Returns: + Deferred[list[Server]]: + a list of the SRV records, or an empty list if none found + """ + now = int(self._get_time()) + + if not isinstance(service_name, bytes): + raise TypeError("%r is not a byte string" % (service_name,)) + + cache_entry = self._cache.get(service_name, None) + if cache_entry: + if all(s.expires > now for s in cache_entry): + servers = list(cache_entry) + defer.returnValue(servers) + + try: + answers, _, _ = yield make_deferred_yieldable( + self._dns_client.lookupService(service_name), + ) + except DNSNameError: + # TODO: cache this. We can get the SOA out of the exception, and use + # the negative-TTL value. + defer.returnValue([]) + except DomainError as e: + # We failed to resolve the name (other than a NameError) + # Try something in the cache, else rereaise + cache_entry = self._cache.get(service_name, None) + if cache_entry: + logger.warn( + "Failed to resolve %r, falling back to cache. %r", + service_name, e + ) + defer.returnValue(list(cache_entry)) + else: + raise e + + if (len(answers) == 1 + and answers[0].type == dns.SRV + and answers[0].payload + and answers[0].payload.target == dns.Name(b'.')): + raise ConnectError("Service %s unavailable" % service_name) + + servers = [] + + for answer in answers: + if answer.type != dns.SRV or not answer.payload: + continue + + payload = answer.payload + + servers.append(Server( + host=payload.target.name, + port=payload.port, + priority=payload.priority, + weight=payload.weight, + expires=now + answer.ttl, + )) + + self._cache[service_name] = list(servers) + defer.returnValue(servers) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 24b6110c20..1682c9af13 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -19,7 +19,7 @@ import random import sys from io import BytesIO -from six import PY3, string_types +from six import PY3, raise_from, string_types from six.moves import urllib import attr @@ -32,7 +32,6 @@ from twisted.internet import defer, protocol from twisted.internet.error import DNSLookupError from twisted.internet.task import _EPSILON, Cooperator from twisted.web._newclient import ResponseDone -from twisted.web.client import Agent, FileBodyProducer, HTTPConnectionPool from twisted.web.http_headers import Headers import synapse.metrics @@ -41,9 +40,11 @@ from synapse.api.errors import ( Codes, FederationDeniedError, HttpResponseException, + RequestSendFailed, SynapseError, ) -from synapse.http.endpoint import matrix_federation_endpoint +from synapse.http import QuieterFileBodyProducer +from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.util.async_helpers import timeout_deferred from synapse.util.logcontext import make_deferred_yieldable from synapse.util.metrics import Measure @@ -65,20 +66,6 @@ else: MAXINT = sys.maxint -class MatrixFederationEndpointFactory(object): - def __init__(self, hs): - self.reactor = hs.get_reactor() - self.tls_client_options_factory = hs.tls_client_options_factory - - def endpointForURI(self, uri): - destination = uri.netloc.decode('ascii') - - return matrix_federation_endpoint( - self.reactor, destination, timeout=10, - tls_client_options_factory=self.tls_client_options_factory - ) - - _next_id = 1 @@ -181,17 +168,15 @@ class MatrixFederationHttpClient(object): requests. """ - def __init__(self, hs): + def __init__(self, hs, tls_client_options_factory): self.hs = hs self.signing_key = hs.config.signing_key[0] self.server_name = hs.hostname reactor = hs.get_reactor() - pool = HTTPConnectionPool(reactor) - pool.retryAutomatically = False - pool.maxPersistentPerHost = 5 - pool.cachedConnectionTimeout = 2 * 60 - self.agent = Agent.usingEndpointFactory( - reactor, MatrixFederationEndpointFactory(hs), pool=pool + + self.agent = MatrixFederationAgent( + hs.get_reactor(), + tls_client_options_factory, ) self.clock = hs.get_clock() self._store = hs.get_datastore() @@ -228,19 +213,18 @@ class MatrixFederationHttpClient(object): backoff_on_404 (bool): Back off if we get a 404 Returns: - Deferred: resolves with the http response object on success. - - Fails with ``HttpResponseException``: if we get an HTTP response - code >= 300. - - Fails with ``NotRetryingDestination`` if we are not yet ready - to retry this server. - - Fails with ``FederationDeniedError`` if this destination - is not on our federation whitelist - - (May also fail with plenty of other Exceptions for things like DNS - failures, connection failures, SSL failures.) + Deferred[twisted.web.client.Response]: resolves with the HTTP + response object on success. + + Raises: + HttpResponseException: If we get an HTTP response code >= 300 + (except 429). + NotRetryingDestination: If we are not yet ready to retry this + server. + FederationDeniedError: If this destination is not on our + federation whitelist + RequestSendFailed: If there were problems connecting to the + remote, due to e.g. DNS failures, connection timeouts etc. """ if timeout: _sec_timeout = timeout / 1000 @@ -271,7 +255,6 @@ class MatrixFederationHttpClient(object): headers_dict = { b"User-Agent": [self.version_string_bytes], - b"Host": [destination_bytes], } with limiter: @@ -298,60 +281,111 @@ class MatrixFederationHttpClient(object): json = request.get_json() if json: headers_dict[b"Content-Type"] = [b"application/json"] - self.sign_request( + auth_headers = self.build_auth_headers( destination_bytes, method_bytes, url_to_sign_bytes, - headers_dict, json, + json, ) data = encode_canonical_json(json) - producer = FileBodyProducer( + producer = QuieterFileBodyProducer( BytesIO(data), cooperator=self._cooperator, ) else: producer = None - self.sign_request( + auth_headers = self.build_auth_headers( destination_bytes, method_bytes, url_to_sign_bytes, - headers_dict, ) + headers_dict[b"Authorization"] = auth_headers + logger.info( - "{%s} [%s] Sending request: %s %s", + "{%s} [%s] Sending request: %s %s; timeout %fs", request.txn_id, request.destination, request.method, - url_str, + url_str, _sec_timeout, ) - # we don't want all the fancy cookie and redirect handling that - # treq.request gives: just use the raw Agent. - request_deferred = self.agent.request( - method_bytes, - url_bytes, - headers=Headers(headers_dict), - bodyProducer=producer, - ) + try: + with Measure(self.clock, "outbound_request"): + # we don't want all the fancy cookie and redirect handling + # that treq.request gives: just use the raw Agent. + request_deferred = self.agent.request( + method_bytes, + url_bytes, + headers=Headers(headers_dict), + bodyProducer=producer, + ) + + request_deferred = timeout_deferred( + request_deferred, + timeout=_sec_timeout, + reactor=self.hs.get_reactor(), + ) + + response = yield request_deferred + except DNSLookupError as e: + raise_from(RequestSendFailed(e, can_retry=retry_on_dns_fail), e) + except Exception as e: + logger.info("Failed to send request: %s", e) + raise_from(RequestSendFailed(e, can_retry=True), e) - request_deferred = timeout_deferred( - request_deferred, - timeout=_sec_timeout, - reactor=self.hs.get_reactor(), + logger.info( + "{%s} [%s] Got response headers: %d %s", + request.txn_id, + request.destination, + response.code, + response.phrase.decode('ascii', errors='replace'), ) - with Measure(self.clock, "outbound_request"): - response = yield make_deferred_yieldable( - request_deferred, + if 200 <= response.code < 300: + pass + else: + # :'( + # Update transactions table? + d = treq.content(response) + d = timeout_deferred( + d, + timeout=_sec_timeout, + reactor=self.hs.get_reactor(), + ) + + try: + body = yield make_deferred_yieldable(d) + except Exception as e: + # Eh, we're already going to raise an exception so lets + # ignore if this fails. + logger.warn( + "{%s} [%s] Failed to get error response: %s %s: %s", + request.txn_id, + request.destination, + request.method, + url_str, + _flatten_response_never_received(e), + ) + body = None + + e = HttpResponseException( + response.code, response.phrase, body ) + # Retry if the error is a 429 (Too Many Requests), + # otherwise just raise a standard HttpResponseException + if response.code == 429: + raise_from(RequestSendFailed(e, can_retry=True), e) + else: + raise e + break - except Exception as e: + except RequestSendFailed as e: logger.warn( "{%s} [%s] Request failed: %s %s: %s", request.txn_id, request.destination, request.method, url_str, - _flatten_response_never_received(e), + _flatten_response_never_received(e.inner_exception), ) - if not retry_on_dns_fail and isinstance(e, DNSLookupError): + if not e.can_retry: raise if retries_left and not timeout: @@ -376,50 +410,36 @@ class MatrixFederationHttpClient(object): else: raise - logger.info( - "{%s} [%s] Got response headers: %d %s", - request.txn_id, - request.destination, - response.code, - response.phrase.decode('ascii', errors='replace'), - ) - - if 200 <= response.code < 300: - pass - else: - # :'( - # Update transactions table? - d = treq.content(response) - d = timeout_deferred( - d, - timeout=_sec_timeout, - reactor=self.hs.get_reactor(), - ) - body = yield make_deferred_yieldable(d) - raise HttpResponseException( - response.code, response.phrase, body - ) + except Exception as e: + logger.warn( + "{%s} [%s] Request failed: %s %s: %s", + request.txn_id, + request.destination, + request.method, + url_str, + _flatten_response_never_received(e), + ) + raise defer.returnValue(response) - def sign_request(self, destination, method, url_bytes, headers_dict, - content=None, destination_is=None): + def build_auth_headers( + self, destination, method, url_bytes, content=None, destination_is=None, + ): """ - Signs a request by adding an Authorization header to headers_dict + Builds the Authorization headers for a federation request Args: destination (bytes|None): The desination home server of the request. May be None if the destination is an identity server, in which case destination_is must be non-None. method (bytes): The HTTP method of the request url_bytes (bytes): The URI path of the request - headers_dict (dict[bytes, list[bytes]]): Dictionary of request headers to - append to content (object): The body of the request destination_is (bytes): As 'destination', but if the destination is an identity server Returns: - None + list[bytes]: a list of headers to be added as "Authorization:" headers """ request = { "method": method, @@ -446,8 +466,7 @@ class MatrixFederationHttpClient(object): self.server_name, key, sig, )).encode('ascii') ) - - headers_dict[b"Authorization"] = auth_headers + return auth_headers @defer.inlineCallbacks def put_json(self, destination, path, args={}, data={}, @@ -477,17 +496,18 @@ class MatrixFederationHttpClient(object): requests) Returns: - Deferred: Succeeds when we get a 2xx HTTP response. The result - will be the decoded JSON body. - - Fails with ``HttpResponseException`` if we get an HTTP response - code >= 300. - - Fails with ``NotRetryingDestination`` if we are not yet ready - to retry this server. - - Fails with ``FederationDeniedError`` if this destination - is not on our federation whitelist + Deferred[dict|list]: Succeeds when we get a 2xx HTTP response. The + result will be the decoded JSON body. + + Raises: + HttpResponseException: If we get an HTTP response code >= 300 + (except 429). + NotRetryingDestination: If we are not yet ready to retry this + server. + FederationDeniedError: If this destination is not on our + federation whitelist + RequestSendFailed: If there were problems connecting to the + remote, due to e.g. DNS failures, connection timeouts etc. """ request = MatrixFederationRequest( @@ -531,17 +551,18 @@ class MatrixFederationHttpClient(object): try the request anyway. args (dict): query params Returns: - Deferred: Succeeds when we get a 2xx HTTP response. The result - will be the decoded JSON body. - - Fails with ``HttpResponseException`` if we get an HTTP response - code >= 300. - - Fails with ``NotRetryingDestination`` if we are not yet ready - to retry this server. - - Fails with ``FederationDeniedError`` if this destination - is not on our federation whitelist + Deferred[dict|list]: Succeeds when we get a 2xx HTTP response. The + result will be the decoded JSON body. + + Raises: + HttpResponseException: If we get an HTTP response code >= 300 + (except 429). + NotRetryingDestination: If we are not yet ready to retry this + server. + FederationDeniedError: If this destination is not on our + federation whitelist + RequestSendFailed: If there were problems connecting to the + remote, due to e.g. DNS failures, connection timeouts etc. """ request = MatrixFederationRequest( @@ -586,17 +607,18 @@ class MatrixFederationHttpClient(object): ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. Returns: - Deferred: Succeeds when we get a 2xx HTTP response. The result - will be the decoded JSON body. - - Fails with ``HttpResponseException`` if we get an HTTP response - code >= 300. - - Fails with ``NotRetryingDestination`` if we are not yet ready - to retry this server. - - Fails with ``FederationDeniedError`` if this destination - is not on our federation whitelist + Deferred[dict|list]: Succeeds when we get a 2xx HTTP response. The + result will be the decoded JSON body. + + Raises: + HttpResponseException: If we get an HTTP response code >= 300 + (except 429). + NotRetryingDestination: If we are not yet ready to retry this + server. + FederationDeniedError: If this destination is not on our + federation whitelist + RequestSendFailed: If there were problems connecting to the + remote, due to e.g. DNS failures, connection timeouts etc. """ logger.debug("get_json args: %s", args) @@ -637,17 +659,18 @@ class MatrixFederationHttpClient(object): ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. Returns: - Deferred: Succeeds when we get a 2xx HTTP response. The result - will be the decoded JSON body. - - Fails with ``HttpResponseException`` if we get an HTTP response - code >= 300. - - Fails with ``NotRetryingDestination`` if we are not yet ready - to retry this server. - - Fails with ``FederationDeniedError`` if this destination - is not on our federation whitelist + Deferred[dict|list]: Succeeds when we get a 2xx HTTP response. The + result will be the decoded JSON body. + + Raises: + HttpResponseException: If we get an HTTP response code >= 300 + (except 429). + NotRetryingDestination: If we are not yet ready to retry this + server. + FederationDeniedError: If this destination is not on our + federation whitelist + RequestSendFailed: If there were problems connecting to the + remote, due to e.g. DNS failures, connection timeouts etc. """ request = MatrixFederationRequest( method="DELETE", @@ -680,18 +703,20 @@ class MatrixFederationHttpClient(object): args (dict): Optional dictionary used to create the query string. ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. - Returns: - Deferred: resolves with an (int,dict) tuple of the file length and - a dict of the response headers. - - Fails with ``HttpResponseException`` if we get an HTTP response code - >= 300 - - Fails with ``NotRetryingDestination`` if we are not yet ready - to retry this server. - Fails with ``FederationDeniedError`` if this destination - is not on our federation whitelist + Returns: + Deferred[tuple[int, dict]]: Resolves with an (int,dict) tuple of + the file length and a dict of the response headers. + + Raises: + HttpResponseException: If we get an HTTP response code >= 300 + (except 429). + NotRetryingDestination: If we are not yet ready to retry this + server. + FederationDeniedError: If this destination is not on our + federation whitelist + RequestSendFailed: If there were problems connecting to the + remote, due to e.g. DNS failures, connection timeouts etc. """ request = MatrixFederationRequest( method="GET", @@ -784,21 +809,21 @@ def check_content_type_is_json(headers): headers (twisted.web.http_headers.Headers): headers to check Raises: - RuntimeError if the + RequestSendFailed: if the Content-Type header is missing or isn't JSON """ c_type = headers.getRawHeaders(b"Content-Type") if c_type is None: - raise RuntimeError( + raise RequestSendFailed(RuntimeError( "No Content-Type header" - ) + ), can_retry=False) c_type = c_type[0].decode('ascii') # only the first header val, options = cgi.parse_header(c_type) if val != "application/json": - raise RuntimeError( + raise RequestSendFailed(RuntimeError( "Content-Type not application/json: was '%s'" % c_type - ) + ), can_retry=False) def encode_query_args(args): diff --git a/synapse/http/server.py b/synapse/http/server.py index b4b25cab19..16fb7935da 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -106,10 +106,10 @@ def wrap_json_request_handler(h): # trace. f = failure.Failure() logger.error( - "Failed handle request via %r: %r: %s", - h, + "Failed handle request via %r: %r", + request.request_metrics.name, request, - f.getTraceback().rstrip(), + exc_info=(f.type, f.value, f.getTracebackObject()), ) # Only respond with an error response if we haven't already started # writing, otherwise lets just kill the connection @@ -169,18 +169,18 @@ def _return_html_error(f, request): ) else: logger.error( - "Failed handle request %r: %s", + "Failed handle request %r", request, - f.getTraceback().rstrip(), + exc_info=(f.type, f.value, f.getTracebackObject()), ) else: code = http_client.INTERNAL_SERVER_ERROR msg = "Internal server error" logger.error( - "Failed handle request %r: %s", + "Failed handle request %r", request, - f.getTraceback().rstrip(), + exc_info=(f.type, f.value, f.getTracebackObject()), ) body = HTML_ERROR_TEMPLATE.format( @@ -468,13 +468,13 @@ def set_cors_headers(request): Args: request (twisted.web.http.Request): The http request to add CORs to. """ - request.setHeader("Access-Control-Allow-Origin", "*") + request.setHeader(b"Access-Control-Allow-Origin", b"*") request.setHeader( - "Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS" + b"Access-Control-Allow-Methods", b"GET, POST, PUT, DELETE, OPTIONS" ) request.setHeader( - "Access-Control-Allow-Headers", - "Origin, X-Requested-With, Content-Type, Accept, Authorization" + b"Access-Control-Allow-Headers", + b"Origin, X-Requested-With, Content-Type, Accept, Authorization" ) diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index a1e4b88e6d..528125e737 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -121,16 +121,15 @@ def parse_string(request, name, default=None, required=False, Args: request: the twisted HTTP request. - name (bytes/unicode): the name of the query parameter. - default (bytes/unicode|None): value to use if the parameter is absent, + name (bytes|unicode): the name of the query parameter. + default (bytes|unicode|None): value to use if the parameter is absent, defaults to None. Must be bytes if encoding is None. required (bool): whether to raise a 400 SynapseError if the parameter is absent, defaults to False. - allowed_values (list[bytes/unicode]): List of allowed values for the + allowed_values (list[bytes|unicode]): List of allowed values for the string, or None if any value is allowed, defaults to None. Must be the same type as name, if given. - encoding: The encoding to decode the name to, and decode the string - content with. + encoding (str|None): The encoding to decode the string content with. Returns: bytes/unicode|None: A string value or the default. Unicode if encoding diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 59900aa5d1..ef48984fdd 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -274,8 +274,6 @@ pending_calls_metric = Histogram( # Federation Metrics # -sent_edus_counter = Counter("synapse_federation_client_sent_edus", "") - sent_transactions_counter = Counter("synapse_federation_client_sent_transactions", "") events_processed_counter = Counter("synapse_federation_client_events_processed", "") diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 097c844d31..fc9a20ff59 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -79,7 +79,7 @@ class ModuleApi(object): Returns: Deferred: a 2-tuple of (user_id, access_token) """ - reg = self.hs.get_handlers().registration_handler + reg = self.hs.get_registration_handler() return reg.register(localpart=localpart) @defer.inlineCallbacks diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py index ecbf364a5e..8bd96b1178 100644 --- a/synapse/push/clientformat.py +++ b/synapse/push/clientformat.py @@ -84,7 +84,7 @@ def _rule_to_template(rule): templaterule["pattern"] = thecond["pattern"] if unscoped_rule_id: - templaterule['rule_id'] = unscoped_rule_id + templaterule['rule_id'] = unscoped_rule_id if 'default' in rule: templaterule['default'] = rule['default'] return templaterule diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index f369124258..50e1007d84 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -85,7 +85,10 @@ class EmailPusher(object): self.timed_call = None def on_new_notifications(self, min_stream_ordering, max_stream_ordering): - self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering) + if self.max_stream_ordering: + self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering) + else: + self.max_stream_ordering = max_stream_ordering self._start_processing() def on_new_receipts(self, min_stream_id, max_stream_id): diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 6bd703632d..e65f8c63d3 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -32,9 +32,25 @@ if six.PY3: logger = logging.getLogger(__name__) -http_push_processed_counter = Counter("synapse_http_httppusher_http_pushes_processed", "") +http_push_processed_counter = Counter( + "synapse_http_httppusher_http_pushes_processed", + "Number of push notifications successfully sent", +) -http_push_failed_counter = Counter("synapse_http_httppusher_http_pushes_failed", "") +http_push_failed_counter = Counter( + "synapse_http_httppusher_http_pushes_failed", + "Number of push notifications which failed", +) + +http_badges_processed_counter = Counter( + "synapse_http_httppusher_badge_updates_processed", + "Number of badge updates successfully sent", +) + +http_badges_failed_counter = Counter( + "synapse_http_httppusher_badge_updates_failed", + "Number of badge updates which failed", +) class HttpPusher(object): @@ -81,6 +97,11 @@ class HttpPusher(object): pusherdict['pushkey'], ) + if self.data is None: + raise PusherConfigException( + "data can not be null for HTTP pusher" + ) + if 'url' not in self.data: raise PusherConfigException( "'url' required in data for HTTP pusher" @@ -311,10 +332,10 @@ class HttpPusher(object): ] } } - if event.type == 'm.room.member': + if event.type == 'm.room.member' and event.is_state(): d['notification']['membership'] = event.content['membership'] d['notification']['user_is_target'] = event.state_key == self.user_id - if self.hs.config.push_include_content and 'content' in event: + if self.hs.config.push_include_content and event.content: d['notification']['content'] = event.content # We no longer send aliases separately, instead, we send the human @@ -333,10 +354,10 @@ class HttpPusher(object): defer.returnValue([]) try: resp = yield self.http_client.post_json_get_json(self.url, notification_dict) - except Exception: - logger.warn( - "Failed to push event %s to %s", - event.event_id, self.name, exc_info=True, + except Exception as e: + logger.warning( + "Failed to push event %s to %s: %s %s", + event.event_id, self.name, type(e), e, ) defer.returnValue(False) rejected = [] @@ -346,6 +367,10 @@ class HttpPusher(object): @defer.inlineCallbacks def _send_badge(self, badge): + """ + Args: + badge (int): number of unread messages + """ logger.info("Sending updated badge count %d to %s", badge, self.name) d = { 'notification': { @@ -366,14 +391,11 @@ class HttpPusher(object): } } try: - resp = yield self.http_client.post_json_get_json(self.url, d) - except Exception: - logger.warn( - "Failed to send badge count to %s", - self.name, exc_info=True, + yield self.http_client.post_json_get_json(self.url, d) + http_badges_processed_counter.inc() + except Exception as e: + logger.warning( + "Failed to send badge count to %s: %s %s", + self.name, type(e), e, ) - defer.returnValue(False) - rejected = [] - if 'rejected' in resp: - rejected = resp['rejected'] - defer.returnValue(rejected) + http_badges_failed_counter.inc() diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 16fb5e8471..1eb5be0957 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -26,7 +26,6 @@ import bleach import jinja2 from twisted.internet import defer -from twisted.mail.smtp import sendmail from synapse.api.constants import EventTypes from synapse.api.errors import StoreError @@ -37,6 +36,7 @@ from synapse.push.presentable_names import ( ) from synapse.types import UserID from synapse.util.async_helpers import concurrently_execute +from synapse.util.logcontext import make_deferred_yieldable from synapse.visibility import filter_events_for_client logger = logging.getLogger(__name__) @@ -85,6 +85,7 @@ class Mailer(object): self.notif_template_html = notif_template_html self.notif_template_text = notif_template_text + self.sendmail = self.hs.get_sendmail() self.store = self.hs.get_datastore() self.macaroon_gen = self.hs.get_macaroon_generator() self.state_handler = self.hs.get_state_handler() @@ -191,17 +192,17 @@ class Mailer(object): multipart_msg.attach(html_part) logger.info("Sending email push notification to %s" % email_address) - # logger.debug(html_text) - yield sendmail( + yield make_deferred_yieldable(self.sendmail( self.hs.config.email_smtp_host, - raw_from, raw_to, multipart_msg.as_string(), + raw_from, raw_to, multipart_msg.as_string().encode('utf8'), + reactor=self.hs.get_reactor(), port=self.hs.config.email_smtp_port, requireAuthentication=self.hs.config.email_smtp_user is not None, username=self.hs.config.email_smtp_user, password=self.hs.config.email_smtp_pass, requireTransportSecurity=self.hs.config.require_transport_security - ) + )) @defer.inlineCallbacks def get_room_vars(self, room_id, user_id, notifs, notif_events, room_state_ids): @@ -333,7 +334,7 @@ class Mailer(object): notif_events, user_id, reason): if len(notifs_by_room) == 1: # Only one room has new stuff - room_id = notifs_by_room.keys()[0] + room_id = list(notifs_by_room.keys())[0] # If the room has some kind of name, use it, but we don't # want the generated-from-names one here otherwise we'll diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 2bd321d530..cf6c8b875e 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -124,7 +124,7 @@ class PushRuleEvaluatorForEvent(object): # XXX: optimisation: cache our pattern regexps if condition['key'] == 'content.body': - body = self._event["content"].get("body", None) + body = self._event.content.get("body", None) if not body: return False @@ -140,7 +140,7 @@ class PushRuleEvaluatorForEvent(object): if not display_name: return False - body = self._event["content"].get("body", None) + body = self._event.content.get("body", None) if not body: return False diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index fcee6d9d7e..b33f2a357b 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -52,11 +52,12 @@ class PusherFactory(object): logger.info("defined email pusher type") def create_pusher(self, pusherdict): - logger.info("trying to create_pusher for %r", pusherdict) - - if pusherdict['kind'] in self.pusher_types: - logger.info("found pusher") - return self.pusher_types[pusherdict['kind']](self.hs, pusherdict) + kind = pusherdict['kind'] + f = self.pusher_types.get(kind, None) + if not f: + return None + logger.debug("creating %s pusher for %r", kind, pusherdict) + return f(self.hs, pusherdict) def _create_email_pusher(self, _hs, pusherdict): app_name = self._app_name_from_pusherdict(pusherdict) diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 5a4e73ccd6..abf1a1a9c1 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -19,6 +19,7 @@ import logging from twisted.internet import defer from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.push import PusherConfigException from synapse.push.pusher import PusherFactory logger = logging.getLogger(__name__) @@ -140,6 +141,10 @@ class PusherPool: @defer.inlineCallbacks def on_new_notifications(self, min_stream_id, max_stream_id): + if not self.pushers: + # nothing to do here. + return + try: users_affected = yield self.store.get_push_action_users_in_range( min_stream_id, max_stream_id @@ -155,6 +160,10 @@ class PusherPool: @defer.inlineCallbacks def on_new_receipts(self, min_stream_id, max_stream_id, affected_room_ids): + if not self.pushers: + # nothing to do here. + return + try: # Need to subtract 1 from the minimum because the lower bound here # is not inclusive @@ -214,6 +223,15 @@ class PusherPool: """ try: p = self.pusher_factory.create_pusher(pusherdict) + except PusherConfigException as e: + logger.warning( + "Pusher incorrectly configured user=%s, appid=%s, pushkey=%s: %s", + pusherdict.get('user_name'), + pusherdict.get('app_id'), + pusherdict.get('pushkey'), + e, + ) + return except Exception: logger.exception("Couldn't start a pusher: caught Exception") return diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 943876456b..f71e21ff4d 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -15,177 +15,153 @@ # limitations under the License. import logging -from distutils.version import LooseVersion + +from pkg_resources import DistributionNotFound, VersionConflict, get_distribution logger = logging.getLogger(__name__) -# this dict maps from python package name to a list of modules we expect it to -# provide. -# -# the key is a "requirement specifier", as used as a parameter to `pip -# install`[1], or an `install_requires` argument to `setuptools.setup` [2]. + +# REQUIREMENTS is a simple list of requirement specifiers[1], and must be +# installed. It is passed to setup() as install_requires in setup.py. # -# the value is a sequence of strings; each entry should be the name of the -# python module, optionally followed by a version assertion which can be either -# ">=<ver>" or "==<ver>". +# CONDITIONAL_REQUIREMENTS is the optional dependencies, represented as a dict +# of lists. The dict key is the optional dependency name and can be passed to +# pip when installing. The list is a series of requirement specifiers[1] to be +# installed when that optional dependency requirement is specified. It is passed +# to setup() as extras_require in setup.py # # [1] https://pip.pypa.io/en/stable/reference/pip_install/#requirement-specifiers. -# [2] https://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-dependencies -REQUIREMENTS = { - "jsonschema>=2.5.1": ["jsonschema>=2.5.1"], - "frozendict>=1": ["frozendict"], - "unpaddedbase64>=1.1.0": ["unpaddedbase64>=1.1.0"], - "canonicaljson>=1.1.3": ["canonicaljson>=1.1.3"], - "signedjson>=1.0.0": ["signedjson>=1.0.0"], - "pynacl>=1.2.1": ["nacl>=1.2.1", "nacl.bindings"], - "service_identity>=16.0.0": ["service_identity>=16.0.0"], - "Twisted>=17.1.0": ["twisted>=17.1.0"], - "treq>=15.1": ["treq>=15.1"], - # Twisted has required pyopenssl 16.0 since about Twisted 16.6. - "pyopenssl>=16.0.0": ["OpenSSL>=16.0.0"], - - "pyyaml>=3.11": ["yaml"], - "pyasn1>=0.1.9": ["pyasn1"], - "pyasn1-modules>=0.0.7": ["pyasn1_modules"], - "daemonize>=2.3.1": ["daemonize"], - "bcrypt>=3.1.0": ["bcrypt>=3.1.0"], - "pillow>=3.1.2": ["PIL"], - "pydenticon>=0.2": ["pydenticon"], - "sortedcontainers>=1.4.4": ["sortedcontainers"], - "psutil>=2.0.0": ["psutil>=2.0.0"], - "pysaml2>=3.0.0": ["saml2"], - "pymacaroons-pynacl>=0.9.3": ["pymacaroons"], - "msgpack-python>=0.4.2": ["msgpack"], - "phonenumbers>=8.2.0": ["phonenumbers"], - "six>=1.10": ["six"], +REQUIREMENTS = [ + "jsonschema>=2.5.1", + "frozendict>=1", + "unpaddedbase64>=1.1.0", + "canonicaljson>=1.1.3", + "signedjson>=1.0.0", + "pynacl>=1.2.1", + "service_identity>=16.0.0", + + # our logcontext handling relies on the ability to cancel inlineCallbacks + # (https://twistedmatrix.com/trac/ticket/4632) which landed in Twisted 18.7. + "Twisted>=18.7.0", + "treq>=15.1", + # Twisted has required pyopenssl 16.0 since about Twisted 16.6. + "pyopenssl>=16.0.0", + "pyyaml>=3.11", + "pyasn1>=0.1.9", + "pyasn1-modules>=0.0.7", + "daemonize>=2.3.1", + "bcrypt>=3.1.0", + "pillow>=3.1.2", + "sortedcontainers>=1.4.4", + "psutil>=2.0.0", + "pymacaroons>=0.13.0", + "msgpack>=0.5.0", + "phonenumbers>=8.2.0", + "six>=1.10", # prometheus_client 0.4.0 changed the format of counter metrics # (cf https://github.com/matrix-org/synapse/issues/4001) - "prometheus_client>=0.0.18,<0.4.0": ["prometheus_client"], + "prometheus_client>=0.0.18,<0.4.0", # we use attr.s(slots), which arrived in 16.0.0 - "attrs>=16.0.0": ["attr>=16.0.0"], - "netaddr>=0.7.18": ["netaddr"], -} - -CONDITIONAL_REQUIREMENTS = { - "web_client": { - "matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"], - }, - "email.enable_notifs": { - "Jinja2>=2.8": ["Jinja2>=2.8"], - "bleach>=1.4.2": ["bleach>=1.4.2"], - }, - "matrix-synapse-ldap3": { - "matrix-synapse-ldap3>=0.1": ["ldap_auth_provider"], - }, - "postgres": { - "psycopg2>=2.6": ["psycopg2"] - } -} + # Twisted 18.7.0 requires attrs>=17.4.0 + "attrs>=17.4.0", + "netaddr>=0.7.18", +] -def requirements(config=None, include_conditional=False): - reqs = REQUIREMENTS.copy() - if include_conditional: - for _, req in CONDITIONAL_REQUIREMENTS.items(): - reqs.update(req) - return reqs - +CONDITIONAL_REQUIREMENTS = { + "email.enable_notifs": ["Jinja2>=2.9", "bleach>=1.4.2"], + "matrix-synapse-ldap3": ["matrix-synapse-ldap3>=0.1"], + "postgres": ["psycopg2>=2.6"], -def github_link(project, version, egg): - return "https://github.com/%s/tarball/%s/#egg=%s" % (project, version, egg) + # ConsentResource uses select_autoescape, which arrived in jinja 2.9 + "resources.consent": ["Jinja2>=2.9"], + # ACME support is required to provision TLS certificates from authorities + # that use the protocol, such as Let's Encrypt. + "acme": ["txacme>=0.9.2"], -DEPENDENCY_LINKS = { + "saml2": ["pysaml2>=4.5.0"], + "url_preview": ["lxml>=3.5.0"], + "test": ["mock>=2.0", "parameterized"], + "sentry": ["sentry-sdk>=0.7.2"], } -class MissingRequirementError(Exception): - def __init__(self, message, module_name, dependency): - super(MissingRequirementError, self).__init__(message) - self.module_name = module_name - self.dependency = dependency - - -def check_requirements(config=None): - """Checks that all the modules needed by synapse have been correctly - installed and are at the correct version""" - for dependency, module_requirements in ( - requirements(config, include_conditional=False).items()): - for module_requirement in module_requirements: - if ">=" in module_requirement: - module_name, required_version = module_requirement.split(">=") - version_test = ">=" - elif "==" in module_requirement: - module_name, required_version = module_requirement.split("==") - version_test = "==" - else: - module_name = module_requirement - version_test = None +def list_requirements(): + deps = set(REQUIREMENTS) + for opt in CONDITIONAL_REQUIREMENTS.values(): + deps = set(opt) | deps + + return list(deps) + + +class DependencyException(Exception): + @property + def message(self): + return "\n".join([ + "Missing Requirements: %s" % (", ".join(self.dependencies),), + "To install run:", + " pip install --upgrade --force %s" % (" ".join(self.dependencies),), + "", + ]) + + @property + def dependencies(self): + for i in self.args[0]: + yield '"' + i + '"' + + +def check_requirements(for_feature=None, _get_distribution=get_distribution): + deps_needed = [] + errors = [] + + if for_feature: + reqs = CONDITIONAL_REQUIREMENTS[for_feature] + else: + reqs = REQUIREMENTS + + for dependency in reqs: + try: + _get_distribution(dependency) + except VersionConflict as e: + deps_needed.append(dependency) + errors.append( + "Needed %s, got %s==%s" + % (dependency, e.dist.project_name, e.dist.version) + ) + except DistributionNotFound: + deps_needed.append(dependency) + errors.append("Needed %s but it was not installed" % (dependency,)) + + if not for_feature: + # Check the optional dependencies are up to date. We allow them to not be + # installed. + OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), []) + for dependency in OPTS: try: - module = __import__(module_name) - except ImportError: - logging.exception( - "Can't import %r which is part of %r", - module_name, dependency + _get_distribution(dependency) + except VersionConflict as e: + deps_needed.append(dependency) + errors.append( + "Needed optional %s, got %s==%s" + % (dependency, e.dist.project_name, e.dist.version) ) - raise MissingRequirementError( - "Can't import %r which is part of %r" - % (module_name, dependency), module_name, dependency - ) - version = getattr(module, "__version__", None) - file_path = getattr(module, "__file__", None) - logger.info( - "Using %r version %r from %r to satisfy %r", - module_name, version, file_path, dependency - ) + except DistributionNotFound: + # If it's not found, we don't care + pass - if version_test == ">=": - if version is None: - raise MissingRequirementError( - "Version of %r isn't set as __version__ of module %r" - % (dependency, module_name), module_name, dependency - ) - if LooseVersion(version) < LooseVersion(required_version): - raise MissingRequirementError( - "Version of %r in %r is too old. %r < %r" - % (dependency, file_path, version, required_version), - module_name, dependency - ) - elif version_test == "==": - if version is None: - raise MissingRequirementError( - "Version of %r isn't set as __version__ of module %r" - % (dependency, module_name), module_name, dependency - ) - if LooseVersion(version) != LooseVersion(required_version): - raise MissingRequirementError( - "Unexpected version of %r in %r. %r != %r" - % (dependency, file_path, version, required_version), - module_name, dependency - ) + if deps_needed: + for e in errors: + logging.error(e) - -def list_requirements(): - result = [] - linked = [] - for link in DEPENDENCY_LINKS.values(): - egg = link.split("#egg=")[1] - linked.append(egg.split('-')[0]) - result.append(link) - for requirement in requirements(include_conditional=True): - is_linked = False - for link in linked: - if requirement.replace('-', '_').startswith(link): - is_linked = True - if not is_linked: - result.append(requirement) - return result + raise DependencyException(deps_needed) if __name__ == "__main__": import sys + sys.stdout.writelines(req + "\n" for req in list_requirements()) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index 19f214281e..81b85352b1 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -14,7 +14,7 @@ # limitations under the License. from synapse.http.server import JsonResource -from synapse.replication.http import federation, membership, send_event +from synapse.replication.http import federation, login, membership, register, send_event REPLICATION_PREFIX = "/_synapse/replication" @@ -28,3 +28,5 @@ class ReplicationRestResource(JsonResource): send_event.register_servlets(hs, self) membership.register_servlets(hs, self) federation.register_servlets(hs, self) + login.register_servlets(hs, self) + register.register_servlets(hs, self) diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 5e5376cf58..e81456ab2b 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -127,7 +127,10 @@ class ReplicationEndpoint(object): def send_request(**kwargs): data = yield cls._serialize_payload(**kwargs) - url_args = [urllib.parse.quote(kwargs[name]) for name in cls.PATH_ARGS] + url_args = [ + urllib.parse.quote(kwargs[name], safe='') + for name in cls.PATH_ARGS + ] if cls.CACHE: txn_id = random_string(10) diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 64a79da162..0f0a07c422 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.events import FrozenEvent +from synapse.events import event_type_from_format_version from synapse.events.snapshot import EventContext from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint @@ -70,6 +70,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): event_payloads.append({ "event": event.get_pdu_json(), + "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, "context": serialized_context, @@ -94,9 +95,12 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): event_and_contexts = [] for event_payload in event_payloads: event_dict = event_payload["event"] + format_ver = event_payload["event_format_version"] internal_metadata = event_payload["internal_metadata"] rejected_reason = event_payload["rejected_reason"] - event = FrozenEvent(event_dict, internal_metadata, rejected_reason) + + EventType = event_type_from_format_version(format_ver) + event = EventType(event_dict, internal_metadata, rejected_reason) context = yield EventContext.deserialize( self.store, event_payload["context"], diff --git a/synapse/replication/http/login.py b/synapse/replication/http/login.py new file mode 100644 index 0000000000..63bc0405ea --- /dev/null +++ b/synapse/replication/http/login.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.http.servlet import parse_json_object_from_request +from synapse.replication.http._base import ReplicationEndpoint + +logger = logging.getLogger(__name__) + + +class RegisterDeviceReplicationServlet(ReplicationEndpoint): + """Ensure a device is registered, generating a new access token for the + device. + + Used during registration and login. + """ + + NAME = "device_check_registered" + PATH_ARGS = ("user_id",) + + def __init__(self, hs): + super(RegisterDeviceReplicationServlet, self).__init__(hs) + self.registration_handler = hs.get_registration_handler() + + @staticmethod + def _serialize_payload(user_id, device_id, initial_display_name, is_guest): + """ + Args: + device_id (str|None): Device ID to use, if None a new one is + generated. + initial_display_name (str|None) + is_guest (bool) + """ + return { + "device_id": device_id, + "initial_display_name": initial_display_name, + "is_guest": is_guest, + } + + @defer.inlineCallbacks + def _handle_request(self, request, user_id): + content = parse_json_object_from_request(request) + + device_id = content["device_id"] + initial_display_name = content["initial_display_name"] + is_guest = content["is_guest"] + + device_id, access_token = yield self.registration_handler.register_device( + user_id, device_id, initial_display_name, is_guest, + ) + + defer.returnValue((200, { + "device_id": device_id, + "access_token": access_token, + })) + + +def register_servlets(hs, http_server): + RegisterDeviceReplicationServlet(hs).register(http_server) diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py index e58bebf12a..81a2b204c7 100644 --- a/synapse/replication/http/membership.py +++ b/synapse/replication/http/membership.py @@ -191,7 +191,7 @@ class ReplicationRegister3PIDGuestRestServlet(ReplicationEndpoint): def __init__(self, hs): super(ReplicationRegister3PIDGuestRestServlet, self).__init__(hs) - self.registeration_handler = hs.get_handlers().registration_handler + self.registeration_handler = hs.get_registration_handler() self.store = hs.get_datastore() self.clock = hs.get_clock() @@ -251,7 +251,7 @@ class ReplicationUserJoinedLeftRoomRestServlet(ReplicationEndpoint): def __init__(self, hs): super(ReplicationUserJoinedLeftRoomRestServlet, self).__init__(hs) - self.registeration_handler = hs.get_handlers().registration_handler + self.registeration_handler = hs.get_registration_handler() self.store = hs.get_datastore() self.clock = hs.get_clock() self.distributor = hs.get_distributor() diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py new file mode 100644 index 0000000000..1d27c9221f --- /dev/null +++ b/synapse/replication/http/register.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.http.servlet import parse_json_object_from_request +from synapse.replication.http._base import ReplicationEndpoint + +logger = logging.getLogger(__name__) + + +class ReplicationRegisterServlet(ReplicationEndpoint): + """Register a new user + """ + + NAME = "register_user" + PATH_ARGS = ("user_id",) + + def __init__(self, hs): + super(ReplicationRegisterServlet, self).__init__(hs) + self.store = hs.get_datastore() + + @staticmethod + def _serialize_payload( + user_id, token, password_hash, was_guest, make_guest, appservice_id, + create_profile_with_displayname, admin, user_type, + ): + """ + Args: + user_id (str): The desired user ID to register. + token (str): The desired access token to use for this user. If this + is not None, the given access token is associated with the user + id. + password_hash (str|None): Optional. The password hash for this user. + was_guest (bool): Optional. Whether this is a guest account being + upgraded to a non-guest account. + make_guest (boolean): True if the the new user should be guest, + false to add a regular user account. + appservice_id (str|None): The ID of the appservice registering the user. + create_profile_with_displayname (unicode|None): Optionally create a + profile for the user, setting their displayname to the given value + admin (boolean): is an admin user? + user_type (str|None): type of user. One of the values from + api.constants.UserTypes, or None for a normal user. + """ + return { + "token": token, + "password_hash": password_hash, + "was_guest": was_guest, + "make_guest": make_guest, + "appservice_id": appservice_id, + "create_profile_with_displayname": create_profile_with_displayname, + "admin": admin, + "user_type": user_type, + } + + @defer.inlineCallbacks + def _handle_request(self, request, user_id): + content = parse_json_object_from_request(request) + + yield self.store.register( + user_id=user_id, + token=content["token"], + password_hash=content["password_hash"], + was_guest=content["was_guest"], + make_guest=content["make_guest"], + appservice_id=content["appservice_id"], + create_profile_with_displayname=content["create_profile_with_displayname"], + admin=content["admin"], + user_type=content["user_type"], + ) + + defer.returnValue((200, {})) + + +class ReplicationPostRegisterActionsServlet(ReplicationEndpoint): + """Run any post registration actions + """ + + NAME = "post_register" + PATH_ARGS = ("user_id",) + + def __init__(self, hs): + super(ReplicationPostRegisterActionsServlet, self).__init__(hs) + self.store = hs.get_datastore() + self.registration_handler = hs.get_registration_handler() + + @staticmethod + def _serialize_payload(user_id, auth_result, access_token, bind_email, + bind_msisdn): + """ + Args: + user_id (str): The user ID that consented + auth_result (dict): The authenticated credentials of the newly + registered user. + access_token (str|None): The access token of the newly logged in + device, or None if `inhibit_login` enabled. + bind_email (bool): Whether to bind the email with the identity + server + bind_msisdn (bool): Whether to bind the msisdn with the identity + server + """ + return { + "auth_result": auth_result, + "access_token": access_token, + "bind_email": bind_email, + "bind_msisdn": bind_msisdn, + } + + @defer.inlineCallbacks + def _handle_request(self, request, user_id): + content = parse_json_object_from_request(request) + + auth_result = content["auth_result"] + access_token = content["access_token"] + bind_email = content["bind_email"] + bind_msisdn = content["bind_msisdn"] + + yield self.registration_handler.post_registration_actions( + user_id=user_id, + auth_result=auth_result, + access_token=access_token, + bind_email=bind_email, + bind_msisdn=bind_msisdn, + ) + + defer.returnValue((200, {})) + + +def register_servlets(hs, http_server): + ReplicationRegisterServlet(hs).register(http_server) + ReplicationPostRegisterActionsServlet(hs).register(http_server) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 5b52c91650..3635015eda 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.events import FrozenEvent +from synapse.events import event_type_from_format_version from synapse.events.snapshot import EventContext from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint @@ -74,6 +74,7 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): payload = { "event": event.get_pdu_json(), + "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, "context": serialized_context, @@ -90,9 +91,12 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): content = parse_json_object_from_request(request) event_dict = content["event"] + format_ver = content["event_format_version"] internal_metadata = content["internal_metadata"] rejected_reason = content["rejected_reason"] - event = FrozenEvent(event_dict, internal_metadata, rejected_reason) + + EventType = event_type_from_format_version(format_ver) + event = EventType(event_dict, internal_metadata, rejected_reason) requester = Requester.deserialize(self.store, content["requester"]) context = yield EventContext.deserialize(self.store, content["context"]) diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py index 2d81d49e9a..817d1f67f9 100644 --- a/synapse/replication/slave/storage/_base.py +++ b/synapse/replication/slave/storage/_base.py @@ -17,7 +17,7 @@ import logging import six -from synapse.storage._base import SQLBaseStore +from synapse.storage._base import _CURRENT_STATE_CACHE_NAME, SQLBaseStore from synapse.storage.engines import PostgresEngine from ._slaved_id_tracker import SlavedIdTracker @@ -54,12 +54,12 @@ class BaseSlavedStore(SQLBaseStore): if stream_name == "caches": self._cache_id_gen.advance(token) for row in rows: - try: - getattr(self, row.cache_func).invalidate(tuple(row.keys)) - except AttributeError: - # We probably haven't pulled in the cache in this worker, - # which is fine. - pass + if row.cache_func == _CURRENT_STATE_CACHE_NAME: + room_id = row.keys[0] + members_changed = set(row.keys[1:]) + self._invalidate_state_caches(room_id, members_changed) + else: + self._attempt_to_invalidate_cache(row.cache_func, tuple(row.keys)) def _invalidate_cache_and_stream(self, txn, cache_func, keys): txn.call_after(cache_func.invalidate, keys) diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index cbe9645817..586dddb40b 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -106,7 +106,7 @@ class ReplicationClientHandler(object): Can be overriden in subclasses to handle more. """ - logger.info("Received rdata %s -> %s", stream_name, token) + logger.debug("Received rdata %s -> %s", stream_name, token) return self.store.process_replication_rows(stream_name, token, rows) def on_position(self, stream_name, token): diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 5dc7b3fffc..0b3fe6cbf5 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -656,7 +656,7 @@ tcp_inbound_commands = LaterGauge( "", ["command", "name"], lambda: { - (k[0], p.name,): count + (k, p.name,): count for p in connected_connections for k, count in iteritems(p.inbound_commands_counter) }, @@ -667,7 +667,7 @@ tcp_outbound_commands = LaterGauge( "", ["command", "name"], lambda: { - (k[0], p.name,): count + (k, p.name,): count for p in connected_connections for k, count in iteritems(p.outbound_commands_counter) }, diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 4856822a5d..91f5247d52 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -14,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from six import PY3 - from synapse.http.server import JsonResource from synapse.rest.client import versions from synapse.rest.client.v1 import ( @@ -36,6 +34,7 @@ from synapse.rest.client.v2_alpha import ( account, account_data, auth, + capabilities, devices, filter, groups, @@ -47,6 +46,7 @@ from synapse.rest.client.v2_alpha import ( register, report_event, room_keys, + room_upgrade_rest_servlet, sendtodevice, sync, tags, @@ -55,11 +55,6 @@ from synapse.rest.client.v2_alpha import ( user_directory, ) -if not PY3: - from synapse.rest.client.v1_only import ( - register as v1_register, - ) - class ClientRestResource(JsonResource): """A resource for version 1 of the matrix client API.""" @@ -72,10 +67,6 @@ class ClientRestResource(JsonResource): def register_servlets(client_resource, hs): versions.register_servlets(client_resource) - if not PY3: - # "v1" (Python 2 only) - v1_register.register_servlets(hs, client_resource) - # Deprecated in r0 initial_sync.register_servlets(hs, client_resource) room.register_deprecated_servlets(hs, client_resource) @@ -116,3 +107,5 @@ class ClientRestResource(JsonResource): sendtodevice.register_servlets(hs, client_resource) user_directory.register_servlets(hs, client_resource) groups.register_servlets(hs, client_resource) + room_upgrade_rest_servlet.register_servlets(hs, client_resource) + capabilities.register_servlets(hs, client_resource) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 41534b8c2a..82433a2aa9 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -23,7 +23,7 @@ from six.moves import http_client from twisted.internet import defer -from synapse.api.constants import Membership +from synapse.api.constants import Membership, UserTypes from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.servlet import ( assert_params_in_dict, @@ -158,6 +158,11 @@ class UserRegisterServlet(ClientV1RestServlet): raise SynapseError(400, "Invalid password") admin = body.get("admin", None) + user_type = body.get("user_type", None) + + if user_type is not None and user_type not in UserTypes.ALL_USER_TYPES: + raise SynapseError(400, "Invalid user type") + got_mac = body["mac"] want_mac = hmac.new( @@ -171,6 +176,9 @@ class UserRegisterServlet(ClientV1RestServlet): want_mac.update(password) want_mac.update(b"\x00") want_mac.update(b"admin" if admin else b"notadmin") + if user_type: + want_mac.update(b"\x00") + want_mac.update(user_type.encode('utf8')) want_mac = want_mac.hexdigest() if not hmac.compare_digest( @@ -189,6 +197,7 @@ class UserRegisterServlet(ClientV1RestServlet): password=body["password"], admin=bool(admin), generate_token=False, + user_type=user_type, ) result = yield register._create_registration_details(user_id, body) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 0010699d31..6121c5b6df 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -18,17 +18,18 @@ import xml.etree.ElementTree as ET from six.moves import urllib -from canonicaljson import json -from saml2 import BINDING_HTTP_POST, config -from saml2.client import Saml2Client - from twisted.internet import defer from twisted.web.client import PartialDownloadError from synapse.api.errors import Codes, LoginError, SynapseError from synapse.http.server import finish_request -from synapse.http.servlet import parse_json_object_from_request -from synapse.types import UserID +from synapse.http.servlet import ( + RestServlet, + parse_json_object_from_request, + parse_string, +) +from synapse.rest.well_known import WellKnownBuilder +from synapse.types import UserID, map_username_to_mxid_localpart from synapse.util.msisdn import phone_number_to_msisdn from .base import ClientV1RestServlet, client_path_patterns @@ -81,30 +82,31 @@ def login_id_thirdparty_from_phone(identifier): class LoginRestServlet(ClientV1RestServlet): PATTERNS = client_path_patterns("/login$") - SAML2_TYPE = "m.login.saml2" CAS_TYPE = "m.login.cas" + SSO_TYPE = "m.login.sso" TOKEN_TYPE = "m.login.token" JWT_TYPE = "m.login.jwt" def __init__(self, hs): super(LoginRestServlet, self).__init__(hs) - self.idp_redirect_url = hs.config.saml2_idp_redirect_url - self.saml2_enabled = hs.config.saml2_enabled self.jwt_enabled = hs.config.jwt_enabled self.jwt_secret = hs.config.jwt_secret self.jwt_algorithm = hs.config.jwt_algorithm self.cas_enabled = hs.config.cas_enabled self.auth_handler = self.hs.get_auth_handler() - self.device_handler = self.hs.get_device_handler() + self.registration_handler = hs.get_registration_handler() self.handlers = hs.get_handlers() + self._well_known_builder = WellKnownBuilder(hs) def on_GET(self, request): flows = [] if self.jwt_enabled: flows.append({"type": LoginRestServlet.JWT_TYPE}) - if self.saml2_enabled: - flows.append({"type": LoginRestServlet.SAML2_TYPE}) if self.cas_enabled: + flows.append({"type": LoginRestServlet.SSO_TYPE}) + + # we advertise CAS for backwards compat, though MSC1721 renamed it + # to SSO. flows.append({"type": LoginRestServlet.CAS_TYPE}) # While its valid for us to advertise this login type generally, @@ -129,29 +131,21 @@ class LoginRestServlet(ClientV1RestServlet): def on_POST(self, request): login_submission = parse_json_object_from_request(request) try: - if self.saml2_enabled and (login_submission["type"] == - LoginRestServlet.SAML2_TYPE): - relay_state = "" - if "relay_state" in login_submission: - relay_state = "&RelayState=" + urllib.parse.quote( - login_submission["relay_state"]) - result = { - "uri": "%s%s" % (self.idp_redirect_url, relay_state) - } - defer.returnValue((200, result)) - elif self.jwt_enabled and (login_submission["type"] == - LoginRestServlet.JWT_TYPE): + if self.jwt_enabled and (login_submission["type"] == + LoginRestServlet.JWT_TYPE): result = yield self.do_jwt_login(login_submission) - defer.returnValue(result) elif login_submission["type"] == LoginRestServlet.TOKEN_TYPE: result = yield self.do_token_login(login_submission) - defer.returnValue(result) else: result = yield self._do_other_login(login_submission) - defer.returnValue(result) except KeyError: raise SynapseError(400, "Missing JSON keys.") + well_known_data = self._well_known_builder.get_well_known() + if well_known_data: + result["well_known"] = well_known_data + defer.returnValue((200, result)) + @defer.inlineCallbacks def _do_other_login(self, login_submission): """Handle non-token/saml/jwt logins @@ -160,7 +154,7 @@ class LoginRestServlet(ClientV1RestServlet): login_submission: Returns: - (int, object): HTTP code/response + dict: HTTP response """ # Log the request we got, but only certain fields to minimise the chance of # logging someone's password (even if they accidentally put it in the wrong @@ -226,11 +220,10 @@ class LoginRestServlet(ClientV1RestServlet): login_submission, ) - device_id = yield self._register_device( - canonical_user_id, login_submission, - ) - access_token = yield auth_handler.get_access_token_for_user_id( - canonical_user_id, device_id, + device_id = login_submission.get("device_id") + initial_display_name = login_submission.get("initial_device_display_name") + device_id, access_token = yield self.registration_handler.register_device( + canonical_user_id, device_id, initial_display_name, ) result = { @@ -243,7 +236,7 @@ class LoginRestServlet(ClientV1RestServlet): if callback is not None: yield callback(result) - defer.returnValue((200, result)) + defer.returnValue(result) @defer.inlineCallbacks def do_token_login(self, login_submission): @@ -252,10 +245,13 @@ class LoginRestServlet(ClientV1RestServlet): user_id = ( yield auth_handler.validate_short_term_login_token_and_get_user_id(token) ) - device_id = yield self._register_device(user_id, login_submission) - access_token = yield auth_handler.get_access_token_for_user_id( - user_id, device_id, + + device_id = login_submission.get("device_id") + initial_display_name = login_submission.get("initial_device_display_name") + device_id, access_token = yield self.registration_handler.register_device( + user_id, device_id, initial_display_name, ) + result = { "user_id": user_id, # may have changed "access_token": access_token, @@ -263,7 +259,7 @@ class LoginRestServlet(ClientV1RestServlet): "device_id": device_id, } - defer.returnValue((200, result)) + defer.returnValue(result) @defer.inlineCallbacks def do_jwt_login(self, login_submission): @@ -292,11 +288,10 @@ class LoginRestServlet(ClientV1RestServlet): auth_handler = self.auth_handler registered_user_id = yield auth_handler.check_user_exists(user_id) if registered_user_id: - device_id = yield self._register_device( - registered_user_id, login_submission - ) - access_token = yield auth_handler.get_access_token_for_user_id( - registered_user_id, device_id, + device_id = login_submission.get("device_id") + initial_display_name = login_submission.get("initial_device_display_name") + device_id, access_token = yield self.registration_handler.register_device( + registered_user_id, device_id, initial_display_name, ) result = { @@ -305,90 +300,30 @@ class LoginRestServlet(ClientV1RestServlet): "home_server": self.hs.hostname, } else: - # TODO: we should probably check that the register isn't going - # to fonx/change our user_id before registering the device - device_id = yield self._register_device(user_id, login_submission) user_id, access_token = ( yield self.handlers.registration_handler.register(localpart=user) ) + + device_id = login_submission.get("device_id") + initial_display_name = login_submission.get("initial_device_display_name") + device_id, access_token = yield self.registration_handler.register_device( + registered_user_id, device_id, initial_display_name, + ) + result = { "user_id": user_id, # may have changed "access_token": access_token, "home_server": self.hs.hostname, } - defer.returnValue((200, result)) - - def _register_device(self, user_id, login_submission): - """Register a device for a user. - - This is called after the user's credentials have been validated, but - before the access token has been issued. - - Args: - (str) user_id: full canonical @user:id - (object) login_submission: dictionary supplied to /login call, from - which we pull device_id and initial_device_name - Returns: - defer.Deferred: (str) device_id - """ - device_id = login_submission.get("device_id") - initial_display_name = login_submission.get( - "initial_device_display_name") - return self.device_handler.check_device_registered( - user_id, device_id, initial_display_name - ) + defer.returnValue(result) -class SAML2RestServlet(ClientV1RestServlet): - PATTERNS = client_path_patterns("/login/saml2", releases=()) +class CasRedirectServlet(RestServlet): + PATTERNS = client_path_patterns("/login/(cas|sso)/redirect") def __init__(self, hs): - super(SAML2RestServlet, self).__init__(hs) - self.sp_config = hs.config.saml2_config_path - self.handlers = hs.get_handlers() - - @defer.inlineCallbacks - def on_POST(self, request): - saml2_auth = None - try: - conf = config.SPConfig() - conf.load_file(self.sp_config) - SP = Saml2Client(conf) - saml2_auth = SP.parse_authn_request_response( - request.args['SAMLResponse'][0], BINDING_HTTP_POST) - except Exception as e: # Not authenticated - logger.exception(e) - if saml2_auth and saml2_auth.status_ok() and not saml2_auth.not_signed: - username = saml2_auth.name_id.text - handler = self.handlers.registration_handler - (user_id, token) = yield handler.register_saml2(username) - # Forward to the RelayState callback along with ava - if 'RelayState' in request.args: - request.redirect(urllib.parse.unquote( - request.args['RelayState'][0]) + - '?status=authenticated&access_token=' + - token + '&user_id=' + user_id + '&ava=' + - urllib.quote(json.dumps(saml2_auth.ava))) - finish_request(request) - defer.returnValue(None) - defer.returnValue((200, {"status": "authenticated", - "user_id": user_id, "token": token, - "ava": saml2_auth.ava})) - elif 'RelayState' in request.args: - request.redirect(urllib.parse.unquote( - request.args['RelayState'][0]) + - '?status=not_authenticated') - finish_request(request) - defer.returnValue(None) - defer.returnValue((200, {"status": "not_authenticated"})) - - -class CasRedirectServlet(ClientV1RestServlet): - PATTERNS = client_path_patterns("/login/cas/redirect", releases=()) - - def __init__(self, hs): - super(CasRedirectServlet, self).__init__(hs) + super(CasRedirectServlet, self).__init__() self.cas_server_url = hs.config.cas_server_url.encode('ascii') self.cas_service_url = hs.config.cas_service_url.encode('ascii') @@ -416,17 +351,15 @@ class CasTicketServlet(ClientV1RestServlet): self.cas_server_url = hs.config.cas_server_url self.cas_service_url = hs.config.cas_service_url self.cas_required_attributes = hs.config.cas_required_attributes - self.auth_handler = hs.get_auth_handler() - self.handlers = hs.get_handlers() - self.macaroon_gen = hs.get_macaroon_generator() + self._sso_auth_handler = SSOAuthHandler(hs) @defer.inlineCallbacks def on_GET(self, request): - client_redirect_url = request.args[b"redirectUrl"][0] + client_redirect_url = parse_string(request, "redirectUrl", required=True) http_client = self.hs.get_simple_http_client() uri = self.cas_server_url + "/proxyValidate" args = { - "ticket": request.args[b"ticket"][0].decode('ascii'), + "ticket": parse_string(request, "ticket", required=True), "service": self.cas_service_url } try: @@ -438,7 +371,6 @@ class CasTicketServlet(ClientV1RestServlet): result = yield self.handle_cas_response(request, body, client_redirect_url) defer.returnValue(result) - @defer.inlineCallbacks def handle_cas_response(self, request, cas_response_body, client_redirect_url): user, attributes = self.parse_cas_response(cas_response_body) @@ -454,28 +386,9 @@ class CasTicketServlet(ClientV1RestServlet): if required_value != actual_value: raise LoginError(401, "Unauthorized", errcode=Codes.UNAUTHORIZED) - user_id = UserID(user, self.hs.hostname).to_string() - auth_handler = self.auth_handler - registered_user_id = yield auth_handler.check_user_exists(user_id) - if not registered_user_id: - registered_user_id, _ = ( - yield self.handlers.registration_handler.register(localpart=user) - ) - - login_token = self.macaroon_gen.generate_short_term_login_token( - registered_user_id + return self._sso_auth_handler.on_successful_auth( + user, request, client_redirect_url, ) - redirect_url = self.add_login_token_to_redirect_url(client_redirect_url, - login_token) - request.redirect(redirect_url) - finish_request(request) - - def add_login_token_to_redirect_url(self, url, token): - url_parts = list(urllib.parse.urlparse(url)) - query = dict(urllib.parse.parse_qsl(url_parts[4])) - query.update({"loginToken": token}) - url_parts[4] = urllib.parse.urlencode(query).encode('ascii') - return urllib.parse.urlunparse(url_parts) def parse_cas_response(self, cas_response_body): user = None @@ -510,10 +423,78 @@ class CasTicketServlet(ClientV1RestServlet): return user, attributes +class SSOAuthHandler(object): + """ + Utility class for Resources and Servlets which handle the response from a SSO + service + + Args: + hs (synapse.server.HomeServer) + """ + def __init__(self, hs): + self._hostname = hs.hostname + self._auth_handler = hs.get_auth_handler() + self._registration_handler = hs.get_registration_handler() + self._macaroon_gen = hs.get_macaroon_generator() + + @defer.inlineCallbacks + def on_successful_auth( + self, username, request, client_redirect_url, + user_display_name=None, + ): + """Called once the user has successfully authenticated with the SSO. + + Registers the user if necessary, and then returns a redirect (with + a login token) to the client. + + Args: + username (unicode|bytes): the remote user id. We'll map this onto + something sane for a MXID localpath. + + request (SynapseRequest): the incoming request from the browser. We'll + respond to it with a redirect. + + client_redirect_url (unicode): the redirect_url the client gave us when + it first started the process. + + user_display_name (unicode|None): if set, and we have to register a new user, + we will set their displayname to this. + + Returns: + Deferred[none]: Completes once we have handled the request. + """ + localpart = map_username_to_mxid_localpart(username) + user_id = UserID(localpart, self._hostname).to_string() + registered_user_id = yield self._auth_handler.check_user_exists(user_id) + if not registered_user_id: + registered_user_id, _ = ( + yield self._registration_handler.register( + localpart=localpart, + generate_token=False, + default_display_name=user_display_name, + ) + ) + + login_token = self._macaroon_gen.generate_short_term_login_token( + registered_user_id + ) + redirect_url = self._add_login_token_to_redirect_url( + client_redirect_url, login_token + ) + request.redirect(redirect_url) + finish_request(request) + + @staticmethod + def _add_login_token_to_redirect_url(url, token): + url_parts = list(urllib.parse.urlparse(url)) + query = dict(urllib.parse.parse_qsl(url_parts[4])) + query.update({"loginToken": token}) + url_parts[4] = urllib.parse.urlencode(query) + return urllib.parse.urlunparse(url_parts) + + def register_servlets(hs, http_server): LoginRestServlet(hs).register(http_server) - if hs.config.saml2_enabled: - SAML2RestServlet(hs).register(http_server) if hs.config.cas_enabled: CasRedirectServlet(hs).register(http_server) CasTicketServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/push_rule.py b/synapse/rest/client/v1/push_rule.py index 9382b1f124..c654f9b5f0 100644 --- a/synapse/rest/client/v1/push_rule.py +++ b/synapse/rest/client/v1/push_rule.py @@ -42,7 +42,7 @@ class PushRuleRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_PUT(self, request): - spec = _rule_spec_from_path(request.postpath) + spec = _rule_spec_from_path([x.decode('utf8') for x in request.postpath]) try: priority_class = _priority_class_from_spec(spec) except InvalidRuleException as e: @@ -103,7 +103,7 @@ class PushRuleRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_DELETE(self, request): - spec = _rule_spec_from_path(request.postpath) + spec = _rule_spec_from_path([x.decode('utf8') for x in request.postpath]) requester = yield self.auth.get_user_by_req(request) user_id = requester.user.to_string() @@ -134,7 +134,7 @@ class PushRuleRestServlet(ClientV1RestServlet): rules = format_push_rules_for_user(requester.user, rules) - path = request.postpath[1:] + path = [x.decode('utf8') for x in request.postpath][1:] if path == []: # we're a reference impl: pedantry is our job. @@ -142,11 +142,10 @@ class PushRuleRestServlet(ClientV1RestServlet): PushRuleRestServlet.SLIGHTLY_PEDANTIC_TRAILING_SLASH_ERROR ) - if path[0] == b'': + if path[0] == '': defer.returnValue((200, rules)) - elif path[0] == b'global': - path = [x.decode('ascii') for x in path[1:]] - result = _filter_ruleset_with_path(rules['global'], path) + elif path[0] == 'global': + result = _filter_ruleset_with_path(rules['global'], path[1:]) defer.returnValue((200, result)) else: raise UnrecognizedRequestError() @@ -190,12 +189,24 @@ class PushRuleRestServlet(ClientV1RestServlet): def _rule_spec_from_path(path): + """Turn a sequence of path components into a rule spec + + Args: + path (sequence[unicode]): the URL path components. + + Returns: + dict: rule spec dict, containing scope/template/rule_id entries, + and possibly attr. + + Raises: + UnrecognizedRequestError if the path components cannot be parsed. + """ if len(path) < 2: raise UnrecognizedRequestError() - if path[0] != b'pushrules': + if path[0] != 'pushrules': raise UnrecognizedRequestError() - scope = path[1].decode('ascii') + scope = path[1] path = path[2:] if scope != 'global': raise UnrecognizedRequestError() @@ -203,13 +214,13 @@ def _rule_spec_from_path(path): if len(path) == 0: raise UnrecognizedRequestError() - template = path[0].decode('ascii') + template = path[0] path = path[1:] if len(path) == 0 or len(path[0]) == 0: raise UnrecognizedRequestError() - rule_id = path[0].decode('ascii') + rule_id = path[0] spec = { 'scope': scope, @@ -220,7 +231,7 @@ def _rule_spec_from_path(path): path = path[1:] if len(path) > 0 and len(path[0]) > 0: - spec['attr'] = path[0].decode('ascii') + spec['attr'] = path[0] return spec diff --git a/synapse/rest/client/v1/pusher.py b/synapse/rest/client/v1/pusher.py index b84f0260f2..4c07ae7f45 100644 --- a/synapse/rest/client/v1/pusher.py +++ b/synapse/rest/client/v1/pusher.py @@ -142,7 +142,7 @@ class PushersRemoveRestServlet(RestServlet): To allow pusher to be delete by clicking a link (ie. GET request) """ PATTERNS = client_path_patterns("/pushers/remove$") - SUCCESS_HTML = "<html><body>You have been unsubscribed</body><html>" + SUCCESS_HTML = b"<html><body>You have been unsubscribed</body><html>" def __init__(self, hs): super(PushersRemoveRestServlet, self).__init__() diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index fcfe7857f6..48da4d557f 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -89,7 +89,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomStateEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() - self.event_creation_hander = hs.get_event_creation_handler() + self.event_creation_handler = hs.get_event_creation_handler() self.room_member_handler = hs.get_room_member_handler() self.message_handler = hs.get_message_handler() @@ -172,7 +172,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): content=content, ) else: - event = yield self.event_creation_hander.create_and_send_nonmember_event( + event = yield self.event_creation_handler.create_and_send_nonmember_event( requester, event_dict, txn_id=txn_id, @@ -189,7 +189,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomSendEventRestServlet, self).__init__(hs) - self.event_creation_hander = hs.get_event_creation_handler() + self.event_creation_handler = hs.get_event_creation_handler() def register(self, http_server): # /rooms/$roomid/send/$event_type[/$txn_id] @@ -211,7 +211,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): if b'ts' in request.args and requester.app_service: event_dict['origin_server_ts'] = parse_integer(request, "ts", 0) - event = yield self.event_creation_hander.create_and_send_nonmember_event( + event = yield self.event_creation_handler.create_and_send_nonmember_event( requester, event_dict, txn_id=txn_id, diff --git a/synapse/rest/client/v1_only/__init__.py b/synapse/rest/client/v1_only/__init__.py deleted file mode 100644 index 936f902ace..0000000000 --- a/synapse/rest/client/v1_only/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -REST APIs that are only used in v1 (the legacy API). -""" diff --git a/synapse/rest/client/v1_only/base.py b/synapse/rest/client/v1_only/base.py deleted file mode 100644 index 9d4db7437c..0000000000 --- a/synapse/rest/client/v1_only/base.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This module contains base REST classes for constructing client v1 servlets. -""" - -import re - -from synapse.api.urls import CLIENT_PREFIX - - -def v1_only_client_path_patterns(path_regex, include_in_unstable=True): - """Creates a regex compiled client path with the correct client path - prefix. - - Args: - path_regex (str): The regex string to match. This should NOT have a ^ - as this will be prefixed. - Returns: - list of SRE_Pattern - """ - patterns = [re.compile("^" + CLIENT_PREFIX + path_regex)] - if include_in_unstable: - unstable_prefix = CLIENT_PREFIX.replace("/api/v1", "/unstable") - patterns.append(re.compile("^" + unstable_prefix + path_regex)) - return patterns diff --git a/synapse/rest/client/v1_only/register.py b/synapse/rest/client/v1_only/register.py deleted file mode 100644 index dadb376b02..0000000000 --- a/synapse/rest/client/v1_only/register.py +++ /dev/null @@ -1,392 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This module contains REST servlets to do with registration: /register""" -import hmac -import logging -from hashlib import sha1 - -from twisted.internet import defer - -import synapse.util.stringutils as stringutils -from synapse.api.constants import LoginType -from synapse.api.errors import Codes, SynapseError -from synapse.config.server import is_threepid_reserved -from synapse.http.servlet import assert_params_in_dict, parse_json_object_from_request -from synapse.rest.client.v1.base import ClientV1RestServlet -from synapse.types import create_requester - -from .base import v1_only_client_path_patterns - -logger = logging.getLogger(__name__) - - -# We ought to be using hmac.compare_digest() but on older pythons it doesn't -# exist. It's a _really minor_ security flaw to use plain string comparison -# because the timing attack is so obscured by all the other code here it's -# unlikely to make much difference -if hasattr(hmac, "compare_digest"): - compare_digest = hmac.compare_digest -else: - def compare_digest(a, b): - return a == b - - -class RegisterRestServlet(ClientV1RestServlet): - """Handles registration with the home server. - - This servlet is in control of the registration flow; the registration - handler doesn't have a concept of multi-stages or sessions. - """ - - PATTERNS = v1_only_client_path_patterns("/register$", include_in_unstable=False) - - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ - super(RegisterRestServlet, self).__init__(hs) - # sessions are stored as: - # self.sessions = { - # "session_id" : { __session_dict__ } - # } - # TODO: persistent storage - self.sessions = {} - self.enable_registration = hs.config.enable_registration - self.auth = hs.get_auth() - self.auth_handler = hs.get_auth_handler() - self.handlers = hs.get_handlers() - - def on_GET(self, request): - - require_email = 'email' in self.hs.config.registrations_require_3pid - require_msisdn = 'msisdn' in self.hs.config.registrations_require_3pid - - flows = [] - if self.hs.config.enable_registration_captcha: - # only support the email-only flow if we don't require MSISDN 3PIDs - if not require_msisdn: - flows.extend([ - { - "type": LoginType.RECAPTCHA, - "stages": [ - LoginType.RECAPTCHA, - LoginType.EMAIL_IDENTITY, - LoginType.PASSWORD - ] - }, - ]) - # only support 3PIDless registration if no 3PIDs are required - if not require_email and not require_msisdn: - flows.extend([ - { - "type": LoginType.RECAPTCHA, - "stages": [LoginType.RECAPTCHA, LoginType.PASSWORD] - } - ]) - else: - # only support the email-only flow if we don't require MSISDN 3PIDs - if require_email or not require_msisdn: - flows.extend([ - { - "type": LoginType.EMAIL_IDENTITY, - "stages": [ - LoginType.EMAIL_IDENTITY, LoginType.PASSWORD - ] - } - ]) - # only support 3PIDless registration if no 3PIDs are required - if not require_email and not require_msisdn: - flows.extend([ - { - "type": LoginType.PASSWORD - } - ]) - return (200, {"flows": flows}) - - @defer.inlineCallbacks - def on_POST(self, request): - register_json = parse_json_object_from_request(request) - - session = (register_json["session"] - if "session" in register_json else None) - login_type = None - assert_params_in_dict(register_json, ["type"]) - - try: - login_type = register_json["type"] - - is_application_server = login_type == LoginType.APPLICATION_SERVICE - can_register = ( - self.enable_registration - or is_application_server - ) - if not can_register: - raise SynapseError(403, "Registration has been disabled") - - stages = { - LoginType.RECAPTCHA: self._do_recaptcha, - LoginType.PASSWORD: self._do_password, - LoginType.EMAIL_IDENTITY: self._do_email_identity, - LoginType.APPLICATION_SERVICE: self._do_app_service, - } - - session_info = self._get_session_info(request, session) - logger.debug("%s : session info %s request info %s", - login_type, session_info, register_json) - response = yield stages[login_type]( - request, - register_json, - session_info - ) - - if "access_token" not in response: - # isn't a final response - response["session"] = session_info["id"] - - defer.returnValue((200, response)) - except KeyError as e: - logger.exception(e) - raise SynapseError(400, "Missing JSON keys for login type %s." % ( - login_type, - )) - - def on_OPTIONS(self, request): - return (200, {}) - - def _get_session_info(self, request, session_id): - if not session_id: - # create a new session - while session_id is None or session_id in self.sessions: - session_id = stringutils.random_string(24) - self.sessions[session_id] = { - "id": session_id, - LoginType.EMAIL_IDENTITY: False, - LoginType.RECAPTCHA: False - } - - return self.sessions[session_id] - - def _save_session(self, session): - # TODO: Persistent storage - logger.debug("Saving session %s", session) - self.sessions[session["id"]] = session - - def _remove_session(self, session): - logger.debug("Removing session %s", session) - self.sessions.pop(session["id"]) - - @defer.inlineCallbacks - def _do_recaptcha(self, request, register_json, session): - if not self.hs.config.enable_registration_captcha: - raise SynapseError(400, "Captcha not required.") - - yield self._check_recaptcha(request, register_json, session) - - session[LoginType.RECAPTCHA] = True # mark captcha as done - self._save_session(session) - defer.returnValue({ - "next": [LoginType.PASSWORD, LoginType.EMAIL_IDENTITY] - }) - - @defer.inlineCallbacks - def _check_recaptcha(self, request, register_json, session): - if ("captcha_bypass_hmac" in register_json and - self.hs.config.captcha_bypass_secret): - if "user" not in register_json: - raise SynapseError(400, "Captcha bypass needs 'user'") - - want = hmac.new( - key=self.hs.config.captcha_bypass_secret, - msg=register_json["user"], - digestmod=sha1, - ).hexdigest() - - # str() because otherwise hmac complains that 'unicode' does not - # have the buffer interface - got = str(register_json["captcha_bypass_hmac"]) - - if compare_digest(want, got): - session["user"] = register_json["user"] - defer.returnValue(None) - else: - raise SynapseError( - 400, "Captcha bypass HMAC incorrect", - errcode=Codes.CAPTCHA_NEEDED - ) - - challenge = None - user_response = None - try: - challenge = register_json["challenge"] - user_response = register_json["response"] - except KeyError: - raise SynapseError(400, "Captcha response is required", - errcode=Codes.CAPTCHA_NEEDED) - - ip_addr = self.hs.get_ip_from_request(request) - - handler = self.handlers.registration_handler - yield handler.check_recaptcha( - ip_addr, - self.hs.config.recaptcha_private_key, - challenge, - user_response - ) - - @defer.inlineCallbacks - def _do_email_identity(self, request, register_json, session): - if (self.hs.config.enable_registration_captcha and - not session[LoginType.RECAPTCHA]): - raise SynapseError(400, "Captcha is required.") - - threepidCreds = register_json['threepidCreds'] - handler = self.handlers.registration_handler - logger.debug("Registering email. threepidcreds: %s" % (threepidCreds)) - yield handler.register_email(threepidCreds) - session["threepidCreds"] = threepidCreds # store creds for next stage - session[LoginType.EMAIL_IDENTITY] = True # mark email as done - self._save_session(session) - defer.returnValue({ - "next": LoginType.PASSWORD - }) - - @defer.inlineCallbacks - def _do_password(self, request, register_json, session): - if (self.hs.config.enable_registration_captcha and - not session[LoginType.RECAPTCHA]): - # captcha should've been done by this stage! - raise SynapseError(400, "Captcha is required.") - - if ("user" in session and "user" in register_json and - session["user"] != register_json["user"]): - raise SynapseError( - 400, "Cannot change user ID during registration" - ) - - password = register_json["password"].encode("utf-8") - desired_user_id = ( - register_json["user"].encode("utf-8") - if "user" in register_json else None - ) - threepid = None - if session.get(LoginType.EMAIL_IDENTITY): - threepid = session["threepidCreds"] - - handler = self.handlers.registration_handler - (user_id, token) = yield handler.register( - localpart=desired_user_id, - password=password, - threepid=threepid, - ) - # Necessary due to auth checks prior to the threepid being - # written to the db - if is_threepid_reserved(self.hs.config, threepid): - yield self.store.upsert_monthly_active_user(user_id) - - if session[LoginType.EMAIL_IDENTITY]: - logger.debug("Binding emails %s to %s" % ( - session["threepidCreds"], user_id) - ) - yield handler.bind_emails(user_id, session["threepidCreds"]) - - result = { - "user_id": user_id, - "access_token": token, - "home_server": self.hs.hostname, - } - self._remove_session(session) - defer.returnValue(result) - - @defer.inlineCallbacks - def _do_app_service(self, request, register_json, session): - as_token = self.auth.get_access_token_from_request(request) - - assert_params_in_dict(register_json, ["user"]) - user_localpart = register_json["user"].encode("utf-8") - - handler = self.handlers.registration_handler - user_id = yield handler.appservice_register( - user_localpart, as_token - ) - token = yield self.auth_handler.issue_access_token(user_id) - self._remove_session(session) - defer.returnValue({ - "user_id": user_id, - "access_token": token, - "home_server": self.hs.hostname, - }) - - -class CreateUserRestServlet(ClientV1RestServlet): - """Handles user creation via a server-to-server interface - """ - - PATTERNS = v1_only_client_path_patterns("/createUser$") - - def __init__(self, hs): - super(CreateUserRestServlet, self).__init__(hs) - self.store = hs.get_datastore() - self.handlers = hs.get_handlers() - - @defer.inlineCallbacks - def on_POST(self, request): - user_json = parse_json_object_from_request(request) - - access_token = self.auth.get_access_token_from_request(request) - app_service = self.store.get_app_service_by_token( - access_token - ) - if not app_service: - raise SynapseError(403, "Invalid application service token.") - - requester = create_requester(app_service.sender) - - logger.debug("creating user: %s", user_json) - response = yield self._do_create(requester, user_json) - - defer.returnValue((200, response)) - - def on_OPTIONS(self, request): - return 403, {} - - @defer.inlineCallbacks - def _do_create(self, requester, user_json): - assert_params_in_dict(user_json, ["localpart", "displayname"]) - - localpart = user_json["localpart"].encode("utf-8") - displayname = user_json["displayname"].encode("utf-8") - password_hash = user_json["password_hash"].encode("utf-8") \ - if user_json.get("password_hash") else None - - handler = self.handlers.registration_handler - user_id, token = yield handler.get_or_create_user( - requester=requester, - localpart=localpart, - displayname=displayname, - password_hash=password_hash - ) - - defer.returnValue({ - "user_id": user_id, - "access_token": token, - "home_server": self.hs.hostname, - }) - - -def register_servlets(hs, http_server): - RegisterRestServlet(hs).register(http_server) - CreateUserRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/account_data.py b/synapse/rest/client/v2_alpha/account_data.py index 371e9aa354..f171b8d626 100644 --- a/synapse/rest/client/v2_alpha/account_data.py +++ b/synapse/rest/client/v2_alpha/account_data.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.api.errors import AuthError, SynapseError +from synapse.api.errors import AuthError, NotFoundError, SynapseError from synapse.http.servlet import RestServlet, parse_json_object_from_request from ._base import client_v2_patterns @@ -28,6 +28,7 @@ logger = logging.getLogger(__name__) class AccountDataServlet(RestServlet): """ PUT /user/{user_id}/account_data/{account_dataType} HTTP/1.1 + GET /user/{user_id}/account_data/{account_dataType} HTTP/1.1 """ PATTERNS = client_v2_patterns( "/user/(?P<user_id>[^/]*)/account_data/(?P<account_data_type>[^/]*)" @@ -57,10 +58,26 @@ class AccountDataServlet(RestServlet): defer.returnValue((200, {})) + @defer.inlineCallbacks + def on_GET(self, request, user_id, account_data_type): + requester = yield self.auth.get_user_by_req(request) + if user_id != requester.user.to_string(): + raise AuthError(403, "Cannot get account data for other users.") + + event = yield self.store.get_global_account_data_by_type_for_user( + account_data_type, user_id, + ) + + if event is None: + raise NotFoundError("Account data not found") + + defer.returnValue((200, event)) + class RoomAccountDataServlet(RestServlet): """ PUT /user/{user_id}/rooms/{room_id}/account_data/{account_dataType} HTTP/1.1 + GET /user/{user_id}/rooms/{room_id}/account_data/{account_dataType} HTTP/1.1 """ PATTERNS = client_v2_patterns( "/user/(?P<user_id>[^/]*)" @@ -99,6 +116,21 @@ class RoomAccountDataServlet(RestServlet): defer.returnValue((200, {})) + @defer.inlineCallbacks + def on_GET(self, request, user_id, room_id, account_data_type): + requester = yield self.auth.get_user_by_req(request) + if user_id != requester.user.to_string(): + raise AuthError(403, "Cannot get account data for other users.") + + event = yield self.store.get_account_data_for_room_and_type( + user_id, room_id, account_data_type, + ) + + if event is None: + raise NotFoundError("Room account data not found") + + defer.returnValue((200, event)) + def register_servlets(hs, http_server): AccountDataServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/auth.py b/synapse/rest/client/v2_alpha/auth.py index 693b303881..ac035c7735 100644 --- a/synapse/rest/client/v2_alpha/auth.py +++ b/synapse/rest/client/v2_alpha/auth.py @@ -21,7 +21,7 @@ from synapse.api.constants import LoginType from synapse.api.errors import SynapseError from synapse.api.urls import CLIENT_V2_ALPHA_PREFIX from synapse.http.server import finish_request -from synapse.http.servlet import RestServlet +from synapse.http.servlet import RestServlet, parse_string from ._base import client_v2_patterns @@ -33,7 +33,7 @@ RECAPTCHA_TEMPLATE = """ <title>Authentication</title> <meta name='viewport' content='width=device-width, initial-scale=1, user-scalable=no, minimum-scale=1.0, maximum-scale=1.0'> -<script src="https://www.google.com/recaptcha/api.js" +<script src="https://www.recaptcha.net/recaptcha/api.js" async defer></script> <script src="//code.jquery.com/jquery-1.11.2.min.js"></script> <link rel="stylesheet" href="/_matrix/static/client/register/style.css"> @@ -68,6 +68,29 @@ function captchaDone() { </html> """ +TERMS_TEMPLATE = """ +<html> +<head> +<title>Authentication</title> +<meta name='viewport' content='width=device-width, initial-scale=1, + user-scalable=no, minimum-scale=1.0, maximum-scale=1.0'> +<link rel="stylesheet" href="/_matrix/static/client/register/style.css"> +</head> +<body> +<form id="registrationForm" method="post" action="%(myurl)s"> + <div> + <p> + Please click the button below if you agree to the + <a href="%(terms_url)s">privacy policy of this homeserver.</a> + </p> + <input type="hidden" name="session" value="%(session)s" /> + <input type="submit" value="Agree" /> + </div> +</form> +</body> +</html> +""" + SUCCESS_TEMPLATE = """ <html> <head> @@ -106,18 +129,14 @@ class AuthRestServlet(RestServlet): self.hs = hs self.auth = hs.get_auth() self.auth_handler = hs.get_auth_handler() - self.registration_handler = hs.get_handlers().registration_handler + self.registration_handler = hs.get_registration_handler() - @defer.inlineCallbacks def on_GET(self, request, stagetype): - yield - if stagetype == LoginType.RECAPTCHA: - if ('session' not in request.args or - len(request.args['session']) == 0): - raise SynapseError(400, "No session supplied") - - session = request.args["session"][0] + session = parse_string(request, "session") + if not session: + raise SynapseError(400, "No session supplied") + if stagetype == LoginType.RECAPTCHA: html = RECAPTCHA_TEMPLATE % { 'session': session, 'myurl': "%s/auth/%s/fallback/web" % ( @@ -132,25 +151,44 @@ class AuthRestServlet(RestServlet): request.write(html_bytes) finish_request(request) - defer.returnValue(None) + return None + elif stagetype == LoginType.TERMS: + html = TERMS_TEMPLATE % { + 'session': session, + 'terms_url': "%s_matrix/consent?v=%s" % ( + self.hs.config.public_baseurl, + self.hs.config.user_consent_version, + ), + 'myurl': "%s/auth/%s/fallback/web" % ( + CLIENT_V2_ALPHA_PREFIX, LoginType.TERMS + ), + } + html_bytes = html.encode("utf8") + request.setResponseCode(200) + request.setHeader(b"Content-Type", b"text/html; charset=utf-8") + request.setHeader(b"Content-Length", b"%d" % (len(html_bytes),)) + + request.write(html_bytes) + finish_request(request) + return None else: raise SynapseError(404, "Unknown auth stage type") @defer.inlineCallbacks def on_POST(self, request, stagetype): - yield - if stagetype == "m.login.recaptcha": - if ('g-recaptcha-response' not in request.args or - len(request.args['g-recaptcha-response'])) == 0: - raise SynapseError(400, "No captcha response supplied") - if ('session' not in request.args or - len(request.args['session'])) == 0: - raise SynapseError(400, "No session supplied") - session = request.args['session'][0] + session = parse_string(request, "session") + if not session: + raise SynapseError(400, "No session supplied") + + if stagetype == LoginType.RECAPTCHA: + response = parse_string(request, "g-recaptcha-response") + + if not response: + raise SynapseError(400, "No captcha response supplied") authdict = { - 'response': request.args['g-recaptcha-response'][0], + 'response': response, 'session': session, } @@ -179,6 +217,41 @@ class AuthRestServlet(RestServlet): finish_request(request) defer.returnValue(None) + elif stagetype == LoginType.TERMS: + if ('session' not in request.args or + len(request.args['session'])) == 0: + raise SynapseError(400, "No session supplied") + + session = request.args['session'][0] + authdict = {'session': session} + + success = yield self.auth_handler.add_oob_auth( + LoginType.TERMS, + authdict, + self.hs.get_ip_from_request(request) + ) + + if success: + html = SUCCESS_TEMPLATE + else: + html = TERMS_TEMPLATE % { + 'session': session, + 'terms_url': "%s_matrix/consent?v=%s" % ( + self.hs.config.public_baseurl, + self.hs.config.user_consent_version, + ), + 'myurl': "%s/auth/%s/fallback/web" % ( + CLIENT_V2_ALPHA_PREFIX, LoginType.TERMS + ), + } + html_bytes = html.encode("utf8") + request.setResponseCode(200) + request.setHeader(b"Content-Type", b"text/html; charset=utf-8") + request.setHeader(b"Content-Length", b"%d" % (len(html_bytes),)) + + request.write(html_bytes) + finish_request(request) + defer.returnValue(None) else: raise SynapseError(404, "Unknown auth stage type") diff --git a/synapse/rest/client/v2_alpha/capabilities.py b/synapse/rest/client/v2_alpha/capabilities.py new file mode 100644 index 0000000000..373f95126e --- /dev/null +++ b/synapse/rest/client/v2_alpha/capabilities.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.internet import defer + +from synapse.api.constants import DEFAULT_ROOM_VERSION, RoomDisposition, RoomVersions +from synapse.http.servlet import RestServlet + +from ._base import client_v2_patterns + +logger = logging.getLogger(__name__) + + +class CapabilitiesRestServlet(RestServlet): + """End point to expose the capabilities of the server.""" + + PATTERNS = client_v2_patterns("/capabilities$") + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(CapabilitiesRestServlet, self).__init__() + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request): + requester = yield self.auth.get_user_by_req(request, allow_guest=True) + user = yield self.store.get_user_by_id(requester.user.to_string()) + change_password = bool(user["password_hash"]) + + response = { + "capabilities": { + "m.room_versions": { + "default": DEFAULT_ROOM_VERSION, + "available": { + RoomVersions.V1: RoomDisposition.STABLE, + RoomVersions.V2: RoomDisposition.STABLE, + RoomVersions.STATE_V2_TEST: RoomDisposition.UNSTABLE, + RoomVersions.V3: RoomDisposition.STABLE, + }, + }, + "m.change_password": {"enabled": change_password}, + } + } + defer.returnValue((200, response)) + + +def register_servlets(hs, http_server): + CapabilitiesRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 192f52e462..94cbba4303 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -145,7 +145,7 @@ class UsernameAvailabilityRestServlet(RestServlet): """ super(UsernameAvailabilityRestServlet, self).__init__() self.hs = hs - self.registration_handler = hs.get_handlers().registration_handler + self.registration_handler = hs.get_registration_handler() self.ratelimiter = FederationRateLimiter( hs.get_clock(), # Time window of 2s @@ -187,10 +187,9 @@ class RegisterRestServlet(RestServlet): self.auth = hs.get_auth() self.store = hs.get_datastore() self.auth_handler = hs.get_auth_handler() - self.registration_handler = hs.get_handlers().registration_handler + self.registration_handler = hs.get_registration_handler() self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() - self.device_handler = hs.get_device_handler() self.macaroon_gen = hs.get_macaroon_generator() @interactive_auth_handler @@ -309,22 +308,16 @@ class RegisterRestServlet(RestServlet): assigned_user_id=registered_user_id, ) - # Only give msisdn flows if the x_show_msisdn flag is given: - # this is a hack to work around the fact that clients were shipped - # that use fallback registration if they see any flows that they don't - # recognise, which means we break registration for these clients if we - # advertise msisdn flows. Once usage of Riot iOS <=0.3.9 and Riot - # Android <=0.6.9 have fallen below an acceptable threshold, this - # parameter should go away and we should always advertise msisdn flows. - show_msisdn = False - if 'x_show_msisdn' in body and body['x_show_msisdn']: - show_msisdn = True - # FIXME: need a better error than "no auth flow found" for scenarios # where we required 3PID for registration but the user didn't give one require_email = 'email' in self.hs.config.registrations_require_3pid require_msisdn = 'msisdn' in self.hs.config.registrations_require_3pid + show_msisdn = True + if self.hs.config.disable_msisdn_registration: + show_msisdn = False + require_msisdn = False + flows = [] if self.hs.config.enable_registration_captcha: # only support 3PIDless registration if no 3PIDs are required @@ -359,6 +352,13 @@ class RegisterRestServlet(RestServlet): [LoginType.MSISDN, LoginType.EMAIL_IDENTITY] ]) + # Append m.login.terms to all flows if we're requiring consent + if self.hs.config.user_consent_at_registration: + new_flows = [] + for flow in flows: + flow.append(LoginType.TERMS) + flows.extend(new_flows) + auth_result, params, session_id = yield self.auth_handler.check_auth( flows, body, self.hs.get_ip_from_request(request) ) @@ -389,8 +389,7 @@ class RegisterRestServlet(RestServlet): registered_user_id ) # don't re-register the threepids - add_email = False - add_msisdn = False + registered = False else: # NB: This may be from the auth handler and NOT from the POST assert_params_in_dict(params, ["password"]) @@ -415,8 +414,11 @@ class RegisterRestServlet(RestServlet): ) # Necessary due to auth checks prior to the threepid being # written to the db - if is_threepid_reserved(self.hs.config, threepid): - yield self.store.upsert_monthly_active_user(registered_user_id) + if threepid: + if is_threepid_reserved( + self.hs.config.mau_limits_reserved_threepids, threepid + ): + yield self.store.upsert_monthly_active_user(registered_user_id) # remember that we've now registered that user account, and with # what user ID (since the user may not have specified) @@ -424,25 +426,19 @@ class RegisterRestServlet(RestServlet): session_id, "registered_user_id", registered_user_id ) - add_email = True - add_msisdn = True + registered = True return_dict = yield self._create_registration_details( registered_user_id, params ) - if add_email and auth_result and LoginType.EMAIL_IDENTITY in auth_result: - threepid = auth_result[LoginType.EMAIL_IDENTITY] - yield self._register_email_threepid( - registered_user_id, threepid, return_dict["access_token"], - params.get("bind_email") - ) - - if add_msisdn and auth_result and LoginType.MSISDN in auth_result: - threepid = auth_result[LoginType.MSISDN] - yield self._register_msisdn_threepid( - registered_user_id, threepid, return_dict["access_token"], - params.get("bind_msisdn") + if registered: + yield self.registration_handler.post_registration_actions( + user_id=registered_user_id, + auth_result=auth_result, + access_token=return_dict.get("access_token"), + bind_email=params.get("bind_email"), + bind_msisdn=params.get("bind_msisdn"), ) defer.returnValue((200, return_dict)) @@ -496,115 +492,6 @@ class RegisterRestServlet(RestServlet): defer.returnValue(result) @defer.inlineCallbacks - def _register_email_threepid(self, user_id, threepid, token, bind_email): - """Add an email address as a 3pid identifier - - Also adds an email pusher for the email address, if configured in the - HS config - - Also optionally binds emails to the given user_id on the identity server - - Args: - user_id (str): id of user - threepid (object): m.login.email.identity auth response - token (str): access_token for the user - bind_email (bool): true if the client requested the email to be - bound at the identity server - Returns: - defer.Deferred: - """ - reqd = ('medium', 'address', 'validated_at') - if any(x not in threepid for x in reqd): - # This will only happen if the ID server returns a malformed response - logger.info("Can't add incomplete 3pid") - return - - yield self.auth_handler.add_threepid( - user_id, - threepid['medium'], - threepid['address'], - threepid['validated_at'], - ) - - # And we add an email pusher for them by default, but only - # if email notifications are enabled (so people don't start - # getting mail spam where they weren't before if email - # notifs are set up on a home server) - if (self.hs.config.email_enable_notifs and - self.hs.config.email_notif_for_new_users): - # Pull the ID of the access token back out of the db - # It would really make more sense for this to be passed - # up when the access token is saved, but that's quite an - # invasive change I'd rather do separately. - user_tuple = yield self.store.get_user_by_access_token( - token - ) - token_id = user_tuple["token_id"] - - yield self.hs.get_pusherpool().add_pusher( - user_id=user_id, - access_token=token_id, - kind="email", - app_id="m.email", - app_display_name="Email Notifications", - device_display_name=threepid["address"], - pushkey=threepid["address"], - lang=None, # We don't know a user's language here - data={}, - ) - - if bind_email: - logger.info("bind_email specified: binding") - logger.debug("Binding emails %s to %s" % ( - threepid, user_id - )) - yield self.identity_handler.bind_threepid( - threepid['threepid_creds'], user_id - ) - else: - logger.info("bind_email not specified: not binding email") - - @defer.inlineCallbacks - def _register_msisdn_threepid(self, user_id, threepid, token, bind_msisdn): - """Add a phone number as a 3pid identifier - - Also optionally binds msisdn to the given user_id on the identity server - - Args: - user_id (str): id of user - threepid (object): m.login.msisdn auth response - token (str): access_token for the user - bind_email (bool): true if the client requested the email to be - bound at the identity server - Returns: - defer.Deferred: - """ - try: - assert_params_in_dict(threepid, ['medium', 'address', 'validated_at']) - except SynapseError as ex: - if ex.errcode == Codes.MISSING_PARAM: - # This will only happen if the ID server returns a malformed response - logger.info("Can't add incomplete 3pid") - defer.returnValue(None) - raise - - yield self.auth_handler.add_threepid( - user_id, - threepid['medium'], - threepid['address'], - threepid['validated_at'], - ) - - if bind_msisdn: - logger.info("bind_msisdn specified: binding") - logger.debug("Binding msisdn %s to %s", threepid, user_id) - yield self.identity_handler.bind_threepid( - threepid['threepid_creds'], user_id - ) - else: - logger.info("bind_msisdn not specified: not binding msisdn") - - @defer.inlineCallbacks def _create_registration_details(self, user_id, params): """Complete registration of newly-registered user @@ -622,12 +509,10 @@ class RegisterRestServlet(RestServlet): "home_server": self.hs.hostname, } if not params.get("inhibit_login", False): - device_id = yield self._register_device(user_id, params) - - access_token = ( - yield self.auth_handler.get_access_token_for_user_id( - user_id, device_id=device_id, - ) + device_id = params.get("device_id") + initial_display_name = params.get("initial_device_display_name") + device_id, access_token = yield self.registration_handler.register_device( + user_id, device_id, initial_display_name, is_guest=False, ) result.update({ @@ -636,26 +521,6 @@ class RegisterRestServlet(RestServlet): }) defer.returnValue(result) - def _register_device(self, user_id, params): - """Register a device for a user. - - This is called after the user's credentials have been validated, but - before the access token has been issued. - - Args: - (str) user_id: full canonical @user:id - (object) params: registration parameters, from which we pull - device_id and initial_device_name - Returns: - defer.Deferred: (str) device_id - """ - # register the user's device - device_id = params.get("device_id") - initial_display_name = params.get("initial_device_display_name") - return self.device_handler.check_device_registered( - user_id, device_id, initial_display_name - ) - @defer.inlineCallbacks def _do_guest_registration(self, params): if not self.hs.config.allow_guest_access: @@ -669,13 +534,10 @@ class RegisterRestServlet(RestServlet): # we have nowhere to store it. device_id = synapse.api.auth.GUEST_DEVICE_ID initial_display_name = params.get("initial_device_display_name") - yield self.device_handler.check_device_registered( - user_id, device_id, initial_display_name + device_id, access_token = yield self.registration_handler.register_device( + user_id, device_id, initial_display_name, is_guest=True, ) - access_token = self.macaroon_gen.generate_access_token( - user_id, ["guest = true"] - ) defer.returnValue((200, { "user_id": user_id, "device_id": device_id, diff --git a/synapse/rest/client/v2_alpha/room_keys.py b/synapse/rest/client/v2_alpha/room_keys.py index 45b5817d8b..220a0de30b 100644 --- a/synapse/rest/client/v2_alpha/room_keys.py +++ b/synapse/rest/client/v2_alpha/room_keys.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.servlet import ( RestServlet, parse_json_object_from_request, @@ -208,10 +208,25 @@ class RoomKeysServlet(RestServlet): user_id, version, room_id, session_id ) + # Convert room_keys to the right format to return. if session_id: - room_keys = room_keys['rooms'][room_id]['sessions'][session_id] + # If the client requests a specific session, but that session was + # not backed up, then return an M_NOT_FOUND. + if room_keys['rooms'] == {}: + raise NotFoundError("No room_keys found") + else: + room_keys = room_keys['rooms'][room_id]['sessions'][session_id] elif room_id: - room_keys = room_keys['rooms'][room_id] + # If the client requests all sessions from a room, but no sessions + # are found, then return an empty result rather than an error, so + # that clients don't have to handle an error condition, and an + # empty result is valid. (Similarly if the client requests all + # sessions from the backup, but in that case, room_keys is already + # in the right format, so we don't need to do anything about it.) + if room_keys['rooms'] == {}: + room_keys = {'sessions': {}} + else: + room_keys = room_keys['rooms'][room_id] defer.returnValue((200, room_keys)) @@ -365,6 +380,40 @@ class RoomKeysVersionServlet(RestServlet): ) defer.returnValue((200, {})) + @defer.inlineCallbacks + def on_PUT(self, request, version): + """ + Update the information about a given version of the user's room_keys backup. + + POST /room_keys/version/12345 HTTP/1.1 + Content-Type: application/json + { + "algorithm": "m.megolm_backup.v1", + "auth_data": { + "public_key": "abcdefg", + "signatures": { + "ed25519:something": "hijklmnop" + } + }, + "version": "42" + } + + HTTP/1.1 200 OK + Content-Type: application/json + {} + """ + requester = yield self.auth.get_user_by_req(request, allow_guest=False) + user_id = requester.user.to_string() + info = parse_json_object_from_request(request) + + if version is None: + raise SynapseError(400, "No version specified to update", Codes.MISSING_PARAM) + + yield self.e2e_room_keys_handler.update_version( + user_id, version, info + ) + defer.returnValue((200, {})) + def register_servlets(hs, http_server): RoomKeysServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/room_upgrade_rest_servlet.py b/synapse/rest/client/v2_alpha/room_upgrade_rest_servlet.py new file mode 100644 index 0000000000..e6356101fd --- /dev/null +++ b/synapse/rest/client/v2_alpha/room_upgrade_rest_servlet.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.api.constants import KNOWN_ROOM_VERSIONS +from synapse.api.errors import Codes, SynapseError +from synapse.http.servlet import ( + RestServlet, + assert_params_in_dict, + parse_json_object_from_request, +) + +from ._base import client_v2_patterns + +logger = logging.getLogger(__name__) + + +class RoomUpgradeRestServlet(RestServlet): + """Handler for room uprade requests. + + Handles requests of the form: + + POST /_matrix/client/r0/rooms/$roomid/upgrade HTTP/1.1 + Content-Type: application/json + + { + "new_version": "2", + } + + Creates a new room and shuts down the old one. Returns the ID of the new room. + + Args: + hs (synapse.server.HomeServer): + """ + PATTERNS = client_v2_patterns( + # /rooms/$roomid/upgrade + "/rooms/(?P<room_id>[^/]*)/upgrade$", + v2_alpha=False, + ) + + def __init__(self, hs): + super(RoomUpgradeRestServlet, self).__init__() + self._hs = hs + self._room_creation_handler = hs.get_room_creation_handler() + self._auth = hs.get_auth() + + @defer.inlineCallbacks + def on_POST(self, request, room_id): + requester = yield self._auth.get_user_by_req(request) + + content = parse_json_object_from_request(request) + assert_params_in_dict(content, ("new_version", )) + new_version = content["new_version"] + + if new_version not in KNOWN_ROOM_VERSIONS: + raise SynapseError( + 400, + "Your homeserver does not support this room version", + Codes.UNSUPPORTED_ROOM_VERSION, + ) + + new_room_id = yield self._room_creation_handler.upgrade_room( + requester, room_id, new_version + ) + + ret = { + "replacement_room": new_room_id, + } + + defer.returnValue((200, ret)) + + +def register_servlets(hs, http_server): + RoomUpgradeRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index 0251146722..39d157a44b 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -75,7 +75,7 @@ class SyncRestServlet(RestServlet): """ PATTERNS = client_v2_patterns("/sync$") - ALLOWED_PRESENCE = set(["online", "offline"]) + ALLOWED_PRESENCE = set(["online", "offline", "unavailable"]) def __init__(self, hs): super(SyncRestServlet, self).__init__() diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 29e62bfcdd..27e7cbf3cc 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -38,6 +38,7 @@ class VersionsRestServlet(RestServlet): "r0.1.0", "r0.2.0", "r0.3.0", + "r0.4.0", ], # as per MSC1497: "unstable_features": { diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py index 7362e1858d..6b371bfa2f 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py @@ -89,6 +89,7 @@ class ConsentResource(Resource): self.hs = hs self.store = hs.get_datastore() + self.registration_handler = hs.get_registration_handler() # this is required by the request_handler wrapper self.clock = hs.get_clock() @@ -100,16 +101,7 @@ class ConsentResource(Resource): "missing in config file.", ) - # daemonize changes the cwd to /, so make the path absolute now. - consent_template_directory = path.abspath( - hs.config.user_consent_template_dir, - ) - if not path.isdir(consent_template_directory): - raise ConfigError( - "Could not find template directory '%s'" % ( - consent_template_directory, - ), - ) + consent_template_directory = hs.config.user_consent_template_dir loader = jinja2.FileSystemLoader(consent_template_directory) self._jinja_env = jinja2.Environment( @@ -137,27 +129,36 @@ class ConsentResource(Resource): request (twisted.web.http.Request): """ - version = parse_string(request, "v", - default=self._default_consent_version) - username = parse_string(request, "u", required=True) - userhmac = parse_string(request, "h", required=True, encoding=None) + version = parse_string(request, "v", default=self._default_consent_version) + username = parse_string(request, "u", required=False, default="") + userhmac = None + has_consented = False + public_version = username == "" + if not public_version: + userhmac_bytes = parse_string(request, "h", required=True, encoding=None) - self._check_hash(username, userhmac) + self._check_hash(username, userhmac_bytes) - if username.startswith('@'): - qualified_user_id = username - else: - qualified_user_id = UserID(username, self.hs.hostname).to_string() + if username.startswith('@'): + qualified_user_id = username + else: + qualified_user_id = UserID(username, self.hs.hostname).to_string() - u = yield self.store.get_user_by_id(qualified_user_id) - if u is None: - raise NotFoundError("Unknown user") + u = yield self.store.get_user_by_id(qualified_user_id) + if u is None: + raise NotFoundError("Unknown user") + + has_consented = u["consent_version"] == version + userhmac = userhmac_bytes.decode("ascii") try: self._render_template( request, "%s.html" % (version,), - user=username, userhmac=userhmac, version=version, - has_consented=(u["consent_version"] == version), + user=username, + userhmac=userhmac, + version=version, + has_consented=has_consented, + public_version=public_version, ) except TemplateNotFound: raise NotFoundError("Unknown policy version") @@ -190,6 +191,7 @@ class ConsentResource(Resource): if e.code != 404: raise raise NotFoundError("Unknown user") + yield self.registration_handler.post_consent_actions(qualified_user_id) try: self._render_template(request, "success.html") @@ -223,7 +225,7 @@ class ConsentResource(Resource): key=self._hmac_secret, msg=userid.encode('utf-8'), digestmod=sha256, - ).hexdigest() + ).hexdigest().encode('ascii') if not compare_digest(want_mac, userhmac): raise SynapseError(http_client.FORBIDDEN, "HMAC incorrect") diff --git a/synapse/rest/key/v1/server_key_resource.py b/synapse/rest/key/v1/server_key_resource.py deleted file mode 100644 index 38eb2ee23f..0000000000 --- a/synapse/rest/key/v1/server_key_resource.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import logging - -from canonicaljson import encode_canonical_json -from signedjson.sign import sign_json -from unpaddedbase64 import encode_base64 - -from OpenSSL import crypto -from twisted.web.resource import Resource - -from synapse.http.server import respond_with_json_bytes - -logger = logging.getLogger(__name__) - - -class LocalKey(Resource): - """HTTP resource containing encoding the TLS X.509 certificate and NACL - signature verification keys for this server:: - - GET /key HTTP/1.1 - - HTTP/1.1 200 OK - Content-Type: application/json - { - "server_name": "this.server.example.com" - "verify_keys": { - "algorithm:version": # base64 encoded NACL verification key. - }, - "tls_certificate": # base64 ASN.1 DER encoded X.509 tls cert. - "signatures": { - "this.server.example.com": { - "algorithm:version": # NACL signature for this server. - } - } - } - """ - - def __init__(self, hs): - self.response_body = encode_canonical_json( - self.response_json_object(hs.config) - ) - Resource.__init__(self) - - @staticmethod - def response_json_object(server_config): - verify_keys = {} - for key in server_config.signing_key: - verify_key_bytes = key.verify_key.encode() - key_id = "%s:%s" % (key.alg, key.version) - verify_keys[key_id] = encode_base64(verify_key_bytes) - - x509_certificate_bytes = crypto.dump_certificate( - crypto.FILETYPE_ASN1, - server_config.tls_certificate - ) - json_object = { - u"server_name": server_config.server_name, - u"verify_keys": verify_keys, - u"tls_certificate": encode_base64(x509_certificate_bytes) - } - for key in server_config.signing_key: - json_object = sign_json( - json_object, - server_config.server_name, - key, - ) - - return json_object - - def render_GET(self, request): - return respond_with_json_bytes( - request, 200, self.response_body, - ) - - def getChild(self, name, request): - if name == b'': - return self diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 76e479afa3..d16a30acd8 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -16,6 +16,7 @@ import logging import os +from six import PY3 from six.moves import urllib from twisted.internet import defer @@ -48,26 +49,21 @@ def parse_media_id(request): return server_name, media_id, file_name except Exception: raise SynapseError( - 404, - "Invalid media id token %r" % (request.postpath,), - Codes.UNKNOWN, + 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN ) def respond_404(request): respond_with_json( - request, 404, - cs_error( - "Not found %r" % (request.postpath,), - code=Codes.NOT_FOUND, - ), - send_cors=True + request, + 404, + cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + send_cors=True, ) @defer.inlineCallbacks -def respond_with_file(request, media_type, file_path, - file_size=None, upload_name=None): +def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None): logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): @@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name): file_size (int): Size in bytes of the media, if known. upload_name (str): The name of the requested file, if any. """ + def _quote(x): return urllib.parse.quote(x.encode("utf-8")) request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: if is_ascii(upload_name): - disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename=%s" % (_quote(upload_name),) else: - disposition = ( - "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) - request.setHeader(b"Content-Disposition", disposition) + request.setHeader(b"Content-Disposition", disposition.encode('ascii')) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to # recommend caching as it's sensitive or private - or at least # select private. don't bother setting Expires as all our # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - - request.setHeader( - b"Content-Length", b"%d" % (file_size,) - ) + request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") + request.setHeader(b"Content-Length", b"%d" % (file_size,)) @defer.inlineCallbacks @@ -142,8 +133,15 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam logger.debug("Responding to media request with responder %s") add_file_headers(request, media_type, file_size, upload_name) - with responder: - yield responder.write_to_consumer(request) + try: + with responder: + yield responder.write_to_consumer(request) + except Exception as e: + # The majority of the time this will be due to the client having gone + # away. Unfortunately, Twisted simply throws a generic exception at us + # in that case. + logger.warning("Failed to write to consumer: %s %s", type(e), e) + finish_request(request) @@ -153,6 +151,7 @@ class Responder(object): Responder is a context manager which *must* be used, so that any resources held can be cleaned up. """ + def write_to_consumer(self, consumer): """Stream response into consumer @@ -186,9 +185,18 @@ class FileInfo(object): thumbnail_method (str) thumbnail_type (str): Content type of thumbnail, e.g. image/png """ - def __init__(self, server_name, file_id, url_cache=False, - thumbnail=False, thumbnail_width=None, thumbnail_height=None, - thumbnail_method=None, thumbnail_type=None): + + def __init__( + self, + server_name, + file_id, + url_cache=False, + thumbnail=False, + thumbnail_width=None, + thumbnail_height=None, + thumbnail_method=None, + thumbnail_type=None, + ): self.server_name = server_name self.file_id = file_id self.url_cache = url_cache @@ -197,3 +205,74 @@ class FileInfo(object): self.thumbnail_height = thumbnail_height self.thumbnail_method = thumbnail_method self.thumbnail_type = thumbnail_type + + +def get_filename_from_headers(headers): + """ + Get the filename of the downloaded file by inspecting the + Content-Disposition HTTP header. + + Args: + headers (twisted.web.http_headers.Headers): The HTTP + request headers. + + Returns: + A Unicode string of the filename, or None. + """ + content_disposition = headers.get(b"Content-Disposition", [b'']) + + # No header, bail out. + if not content_disposition[0]: + return + + # dict of unicode: bytes, corresponding to the key value sections of the + # Content-Disposition header. + params = {} + parts = content_disposition[0].split(b";") + for i in parts: + # Split into key-value pairs, if able + # We don't care about things like `inline`, so throw it out + if b"=" not in i: + continue + + key, value = i.strip().split(b"=") + params[key.decode('ascii')] = value + + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith(b"utf-8''"): + upload_name_utf8 = upload_name_utf8[7:] + # We have a filename*= section. This MUST be ASCII, and any UTF-8 + # bytes are %-quoted. + if PY3: + try: + # Once it is decoded, we can then unquote the %-encoded + # parts strictly into a unicode string. + upload_name = urllib.parse.unquote( + upload_name_utf8.decode('ascii'), errors="strict" + ) + except UnicodeDecodeError: + # Incorrect UTF-8. + pass + else: + # On Python 2, we first unquote the %-encoded parts and then + # decode it strictly using UTF-8. + try: + upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8') + except UnicodeDecodeError: + pass + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + # Make sure there's no %-quoted bytes. If there is, reject it as + # non-valid ASCII. + if b"%" not in upload_name_ascii: + upload_name = upload_name_ascii.decode('ascii') + + # This may be None here, indicating we did not find a matching name. + return upload_name diff --git a/synapse/rest/media/v1/config_resource.py b/synapse/rest/media/v1/config_resource.py index d6605b6027..77316033f7 100644 --- a/synapse/rest/media/v1/config_resource.py +++ b/synapse/rest/media/v1/config_resource.py @@ -41,7 +41,7 @@ class MediaConfigResource(Resource): @defer.inlineCallbacks def _async_render_GET(self, request): yield self.auth.get_user_by_req(request) - respond_with_json(request, 200, self.limits_dict) + respond_with_json(request, 200, self.limits_dict, send_cors=True) def render_OPTIONS(self, request): respond_with_json(request, 200, {}, send_cors=True) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index f911b120b1..bdc5daecc1 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -48,7 +48,8 @@ class DownloadResource(Resource): set_cors_headers(request) request.setHeader( b"Content-Security-Policy", - b"default-src 'none';" + b"sandbox;" + b" default-src 'none';" b" script-src 'none';" b" plugin-types application/pdf;" b" style-src 'unsafe-inline';" diff --git a/synapse/rest/media/v1/identicon_resource.py b/synapse/rest/media/v1/identicon_resource.py deleted file mode 100644 index bdbd8d50dd..0000000000 --- a/synapse/rest/media/v1/identicon_resource.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from pydenticon import Generator - -from twisted.web.resource import Resource - -from synapse.http.servlet import parse_integer - -FOREGROUND = [ - "rgb(45,79,255)", - "rgb(254,180,44)", - "rgb(226,121,234)", - "rgb(30,179,253)", - "rgb(232,77,65)", - "rgb(49,203,115)", - "rgb(141,69,170)" -] - -BACKGROUND = "rgb(224,224,224)" -SIZE = 5 - - -class IdenticonResource(Resource): - isLeaf = True - - def __init__(self): - Resource.__init__(self) - self.generator = Generator( - SIZE, SIZE, foreground=FOREGROUND, background=BACKGROUND, - ) - - def generate_identicon(self, name, width, height): - v_padding = width % SIZE - h_padding = height % SIZE - top_padding = v_padding // 2 - left_padding = h_padding // 2 - bottom_padding = v_padding - top_padding - right_padding = h_padding - left_padding - width -= v_padding - height -= h_padding - padding = (top_padding, bottom_padding, left_padding, right_padding) - identicon = self.generator.generate( - name, width, height, padding=padding - ) - return identicon - - def render_GET(self, request): - name = "/".join(request.postpath) - width = parse_integer(request, "width", default=96) - height = parse_integer(request, "height", default=96) - identicon_bytes = self.generate_identicon(name, width, height) - request.setHeader(b"Content-Type", b"image/png") - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - return identicon_bytes diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 08b1867fab..bdffa97805 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -14,14 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cgi import errno import logging import os import shutil -from six import PY3, iteritems -from six.moves.urllib import parse as urlparse +from six import iteritems import twisted.internet.error import twisted.web.http @@ -32,20 +30,24 @@ from synapse.api.errors import ( FederationDeniedError, HttpResponseException, NotFoundError, + RequestSendFailed, SynapseError, ) -from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util import logcontext from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string -from ._base import FileInfo, respond_404, respond_with_responder +from ._base import ( + FileInfo, + get_filename_from_headers, + respond_404, + respond_with_responder, +) from .config_resource import MediaConfigResource from .download_resource import DownloadResource from .filepath import MediaFilePaths -from .identicon_resource import IdenticonResource from .media_storage import MediaStorage from .preview_url_resource import PreviewUrlResource from .storage_provider import StorageProviderWrapper @@ -63,7 +65,7 @@ class MediaRepository(object): def __init__(self, hs): self.hs = hs self.auth = hs.get_auth() - self.client = MatrixFederationHttpClient(hs) + self.client = hs.get_http_client() self.clock = hs.get_clock() self.server_name = hs.hostname self.store = hs.get_datastore() @@ -371,10 +373,10 @@ class MediaRepository(object): "allow_remote": "false", } ) - except twisted.internet.error.DNSLookupError as e: - logger.warn("HTTP error fetching remote media %s/%s: %r", + except RequestSendFailed as e: + logger.warn("Request failed fetching remote media %s/%s: %r", server_name, media_id, e) - raise NotFoundError() + raise SynapseError(502, "Failed to fetch remote media") except HttpResponseException as e: logger.warn("HTTP error fetching remote media %s/%s: %s", @@ -398,39 +400,9 @@ class MediaRepository(object): yield finish() media_type = headers[b"Content-Type"][0].decode('ascii') - + upload_name = get_filename_from_headers(headers) time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0].decode('ascii'),) - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get("filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii - - if upload_name: - if PY3: - upload_name = urlparse.unquote(upload_name) - else: - upload_name = urlparse.unquote(upload_name.encode('ascii')) - try: - if isinstance(upload_name, bytes): - upload_name = upload_name.decode("utf-8") - except UnicodeDecodeError: - upload_name = None - else: - upload_name = None - logger.info("Stored remote media in file %r", fname) yield self.store.store_cached_remote_media( @@ -769,7 +741,6 @@ class MediaRepositoryResource(Resource): self.putChild(b"thumbnail", ThumbnailResource( hs, media_repo, media_repo.media_storage, )) - self.putChild(b"identicon", IdenticonResource()) if hs.config.url_preview_enabled: self.putChild(b"preview_url", PreviewUrlResource( hs, media_repo, media_repo.media_storage, diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 1a7bfd6b56..ba3ab1d37d 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import cgi + import datetime import errno import fnmatch @@ -24,6 +24,7 @@ import shutil import sys import traceback +import six from six import string_types from six.moves import urllib_parse as urlparse @@ -34,7 +35,7 @@ from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET from synapse.api.errors import Codes, SynapseError -from synapse.http.client import SpiderHttpClient +from synapse.http.client import SimpleHttpClient from synapse.http.server import ( respond_with_json, respond_with_json_bytes, @@ -42,15 +43,19 @@ from synapse.http.server import ( ) from synapse.http.servlet import parse_integer, parse_string from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.rest.media.v1._base import get_filename_from_headers from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logcontext import make_deferred_yieldable, run_in_background -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string from ._base import FileInfo logger = logging.getLogger(__name__) +_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I) +_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) + class PreviewUrlResource(Resource): isLeaf = True @@ -64,7 +69,12 @@ class PreviewUrlResource(Resource): self.max_spider_size = hs.config.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() - self.client = SpiderHttpClient(hs) + self.client = SimpleHttpClient( + hs, + treq_args={"browser_like_redirects": True}, + ip_whitelist=hs.config.url_preview_ip_range_whitelist, + ip_blacklist=hs.config.url_preview_ip_range_blacklist, + ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage @@ -98,7 +108,7 @@ class PreviewUrlResource(Resource): # XXX: if get_user_by_req fails, what should we do in an async render? requester = yield self.auth.get_user_by_req(request) url = parse_string(request, "url") - if "ts" in request.args: + if b"ts" in request.args: ts = parse_integer(request, "ts") else: ts = self.clock.time_msec() @@ -180,7 +190,12 @@ class PreviewUrlResource(Resource): cache_result["expires_ts"] > ts and cache_result["response_code"] / 100 == 2 ): - defer.returnValue(cache_result["og"]) + # It may be stored as text in the database, not as bytes (such as + # PostgreSQL). If so, encode it back before handing it on. + og = cache_result["og"] + if isinstance(og, six.text_type): + og = og.encode('utf8') + defer.returnValue(og) return media_info = yield self._download_url(url, user) @@ -213,15 +228,28 @@ class PreviewUrlResource(Resource): elif _is_html(media_info['media_type']): # TODO: somehow stop a big HTML tree from exploding synapse's RAM - file = open(media_info['filename']) - body = file.read() - file.close() + with open(media_info['filename'], 'rb') as file: + body = file.read() - # clobber the encoding from the content-type, or default to utf-8 - # XXX: this overrides any <meta/> or XML charset headers in the body - # which may pose problems, but so far seems to work okay. - match = re.match(r'.*; *charset=(.*?)(;|$)', media_info['media_type'], re.I) - encoding = match.group(1) if match else "utf-8" + encoding = None + + # Let's try and figure out if it has an encoding set in a meta tag. + # Limit it to the first 1kb, since it ought to be in the meta tags + # at the top. + match = _charset_match.search(body[:1000]) + + # If we find a match, it should take precedence over the + # Content-Type header, so set it here. + if match: + encoding = match.group(1).decode('ascii') + + # If we don't find a match, we'll look at the HTTP Content-Type, and + # if that doesn't exist, we'll fall back to UTF-8. + if not encoding: + match = _content_type_match.match( + media_info['media_type'] + ) + encoding = match.group(1) if match else "utf-8" og = decode_and_calc_og(body, media_info['uri'], encoding) @@ -295,6 +323,11 @@ class PreviewUrlResource(Resource): length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) + except SynapseError: + # Pass SynapseErrors through directly, so that the servlet + # handler will return a SynapseError to the client instead of + # blank data or a 500. + raise except Exception as e: # FIXME: pass through 404s and other error messages nicely logger.warn("Error downloading %s: %r", url, e) @@ -313,31 +346,7 @@ class PreviewUrlResource(Resource): media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) - download_name = None - - # First check if there is a valid UTF-8 filename - download_name_utf8 = params.get("filename*", None) - if download_name_utf8: - if download_name_utf8.lower().startswith("utf-8''"): - download_name = download_name_utf8[7:] - - # If there isn't check for an ascii name. - if not download_name: - download_name_ascii = params.get("filename", None) - if download_name_ascii and is_ascii(download_name_ascii): - download_name = download_name_ascii - - if download_name: - download_name = urlparse.unquote(download_name) - try: - download_name = download_name.decode("utf-8") - except UnicodeDecodeError: - download_name = None - else: - download_name = None + download_name = get_filename_from_headers(headers) yield self.store.store_local_media( media_id=file_id, diff --git a/synapse/storage/schema/delta/34/sent_txn_purge.py b/synapse/rest/saml2/__init__.py index 0ffab10b6f..68da37ca6a 100644 --- a/synapse/storage/schema/delta/34/sent_txn_purge.py +++ b/synapse/rest/saml2/__init__.py @@ -1,4 +1,5 @@ -# Copyright 2016 OpenMarket Ltd +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,22 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import logging -from synapse.storage.engines import PostgresEngine - -logger = logging.getLogger(__name__) +from twisted.web.resource import Resource +from synapse.rest.saml2.metadata_resource import SAML2MetadataResource +from synapse.rest.saml2.response_resource import SAML2ResponseResource -def run_create(cur, database_engine, *args, **kwargs): - if isinstance(database_engine, PostgresEngine): - cur.execute("TRUNCATE sent_transactions") - else: - cur.execute("DELETE FROM sent_transactions") - - cur.execute("CREATE INDEX sent_transactions_ts ON sent_transactions(ts)") +logger = logging.getLogger(__name__) -def run_upgrade(cur, database_engine, *args, **kwargs): - pass +class SAML2Resource(Resource): + def __init__(self, hs): + Resource.__init__(self) + self.putChild(b"metadata.xml", SAML2MetadataResource(hs)) + self.putChild(b"authn_response", SAML2ResponseResource(hs)) diff --git a/synapse/rest/saml2/metadata_resource.py b/synapse/rest/saml2/metadata_resource.py new file mode 100644 index 0000000000..e8c680aeb4 --- /dev/null +++ b/synapse/rest/saml2/metadata_resource.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import saml2.metadata + +from twisted.web.resource import Resource + + +class SAML2MetadataResource(Resource): + """A Twisted web resource which renders the SAML metadata""" + + isLeaf = 1 + + def __init__(self, hs): + Resource.__init__(self) + self.sp_config = hs.config.saml2_sp_config + + def render_GET(self, request): + metadata_xml = saml2.metadata.create_metadata_string( + configfile=None, config=self.sp_config, + ) + request.setHeader(b"Content-Type", b"text/xml; charset=utf-8") + return metadata_xml diff --git a/synapse/rest/saml2/response_resource.py b/synapse/rest/saml2/response_resource.py new file mode 100644 index 0000000000..69fb77b322 --- /dev/null +++ b/synapse/rest/saml2/response_resource.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +import saml2 +from saml2.client import Saml2Client + +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET + +from synapse.api.errors import CodeMessageException +from synapse.http.server import wrap_html_request_handler +from synapse.http.servlet import parse_string +from synapse.rest.client.v1.login import SSOAuthHandler + +logger = logging.getLogger(__name__) + + +class SAML2ResponseResource(Resource): + """A Twisted web resource which handles the SAML response""" + + isLeaf = 1 + + def __init__(self, hs): + Resource.__init__(self) + + self._saml_client = Saml2Client(hs.config.saml2_sp_config) + self._sso_auth_handler = SSOAuthHandler(hs) + + def render_POST(self, request): + self._async_render_POST(request) + return NOT_DONE_YET + + @wrap_html_request_handler + def _async_render_POST(self, request): + resp_bytes = parse_string(request, 'SAMLResponse', required=True) + relay_state = parse_string(request, 'RelayState', required=True) + + try: + saml2_auth = self._saml_client.parse_authn_request_response( + resp_bytes, saml2.BINDING_HTTP_POST, + ) + except Exception as e: + logger.warning("Exception parsing SAML2 response", exc_info=1) + raise CodeMessageException( + 400, "Unable to parse SAML2 response: %s" % (e,), + ) + + if saml2_auth.not_signed: + raise CodeMessageException(400, "SAML2 response was not signed") + + if "uid" not in saml2_auth.ava: + raise CodeMessageException(400, "uid not in SAML2 response") + + username = saml2_auth.ava["uid"][0] + + displayName = saml2_auth.ava.get("displayName", [None])[0] + return self._sso_auth_handler.on_successful_auth( + username, request, relay_state, + user_display_name=displayName, + ) diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py new file mode 100644 index 0000000000..c0a4ae93e5 --- /dev/null +++ b/synapse/rest/well_known.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging + +from twisted.web.resource import Resource + +from synapse.http.server import set_cors_headers + +logger = logging.getLogger(__name__) + + +class WellKnownBuilder(object): + """Utility to construct the well-known response + + Args: + hs (synapse.server.HomeServer): + """ + def __init__(self, hs): + self._config = hs.config + + def get_well_known(self): + # if we don't have a public_base_url, we can't help much here. + if self._config.public_baseurl is None: + return None + + result = { + "m.homeserver": { + "base_url": self._config.public_baseurl, + }, + } + + if self._config.default_identity_server: + result["m.identity_server"] = { + "base_url": self._config.default_identity_server, + } + + return result + + +class WellKnownResource(Resource): + """A Twisted web resource which renders the .well-known file""" + + isLeaf = 1 + + def __init__(self, hs): + Resource.__init__(self) + self._well_known_builder = WellKnownBuilder(hs) + + def render_GET(self, request): + set_cors_headers(request) + r = self._well_known_builder.get_well_known() + if not r: + request.setResponseCode(404) + request.setHeader(b"Content-Type", b"text/plain") + return b'.well-known not available' + + logger.error("returning: %s", r) + request.setHeader(b"Content-Type", b"application/json") + return json.dumps(r).encode("utf-8") diff --git a/synapse/server.py b/synapse/server.py index cf6b872cbd..4d364fccce 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -23,6 +23,7 @@ import abc import logging from twisted.enterprise import adbapi +from twisted.mail.smtp import sendmail from twisted.web.client import BrowserLikePolicyForHTTPS from synapse.api.auth import Auth @@ -30,6 +31,7 @@ from synapse.api.filtering import Filtering from synapse.api.ratelimiting import Ratelimiter from synapse.appservice.api import ApplicationServiceApi from synapse.appservice.scheduler import ApplicationServiceScheduler +from synapse.crypto import context_factory from synapse.crypto.keyring import Keyring from synapse.events.builder import EventBuilderFactory from synapse.events.spamcheck import SpamChecker @@ -45,6 +47,7 @@ from synapse.federation.transport.client import TransportLayerClient from synapse.groups.attestations import GroupAttestationSigning, GroupAttestionRenewer from synapse.groups.groups_server import GroupsServerHandler from synapse.handlers import Handlers +from synapse.handlers.acme import AcmeHandler from synapse.handlers.appservice import ApplicationServicesHandler from synapse.handlers.auth import AuthHandler, MacaroonGenerator from synapse.handlers.deactivate_account import DeactivateAccountHandler @@ -61,6 +64,7 @@ from synapse.handlers.presence import PresenceHandler from synapse.handlers.profile import BaseProfileHandler, MasterProfileHandler from synapse.handlers.read_marker import ReadMarkerHandler from synapse.handlers.receipts import ReceiptsHandler +from synapse.handlers.register import RegistrationHandler from synapse.handlers.room import RoomContextHandler, RoomCreationHandler from synapse.handlers.room_list import RoomListHandler from synapse.handlers.room_member import RoomMemberMasterHandler @@ -110,6 +114,8 @@ class HomeServer(object): Attributes: config (synapse.config.homeserver.HomeserverConfig): + _listening_services (list[twisted.internet.tcp.Port]): TCP ports that + we are listening on to provide HTTP services. """ __metaclass__ = abc.ABCMeta @@ -128,6 +134,7 @@ class HomeServer(object): 'sync_handler', 'typing_handler', 'room_list_handler', + 'acme_handler', 'auth_handler', 'device_handler', 'e2e_keys_handler', @@ -174,6 +181,8 @@ class HomeServer(object): 'message_handler', 'pagination_handler', 'room_context_handler', + 'sendmail', + 'registration_handler', ] # This is overridden in derived application classes @@ -192,6 +201,7 @@ class HomeServer(object): self._reactor = reactor self.hostname = hostname self._building = {} + self._listening_services = [] self.clock = Clock(reactor) self.distributor = Distributor() @@ -269,6 +279,9 @@ class HomeServer(object): def build_room_creation_handler(self): return RoomCreationHandler(self) + def build_sendmail(self): + return sendmail + def build_state_handler(self): return StateHandler(self) @@ -305,6 +318,9 @@ class HomeServer(object): def build_e2e_room_keys_handler(self): return E2eRoomKeysHandler(self) + def build_acme_handler(self): + return AcmeHandler(self) + def build_application_service_api(self): return ApplicationServiceApi(self) @@ -345,10 +361,7 @@ class HomeServer(object): return Keyring(self) def build_event_builder_factory(self): - return EventBuilderFactory( - clock=self.get_clock(), - hostname=self.hostname, - ) + return EventBuilderFactory(self) def build_filtering(self): return Filtering(self) @@ -357,7 +370,10 @@ class HomeServer(object): return PusherPool(self) def build_http_client(self): - return MatrixFederationHttpClient(self) + tls_client_options_factory = context_factory.ClientTLSOptionsFactory( + self.config + ) + return MatrixFederationHttpClient(self, tls_client_options_factory) def build_db_pool(self): name = self.db_config["name"] @@ -467,6 +483,9 @@ class HomeServer(object): def build_room_context_handler(self): return RoomContextHandler(self) + def build_registration_handler(self): + return RegistrationHandler(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) diff --git a/synapse/server.pyi b/synapse/server.pyi index ce28486233..06cd083a74 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -7,6 +7,9 @@ import synapse.handlers.auth import synapse.handlers.deactivate_account import synapse.handlers.device import synapse.handlers.e2e_keys +import synapse.handlers.room +import synapse.handlers.room_member +import synapse.handlers.message import synapse.handlers.set_password import synapse.rest.media.v1.media_repository import synapse.server_notices.server_notices_manager @@ -50,6 +53,9 @@ class HomeServer(object): def get_room_creation_handler(self) -> synapse.handlers.room.RoomCreationHandler: pass + def get_room_member_handler(self) -> synapse.handlers.room_member.RoomMemberHandler: + pass + def get_event_creation_handler(self) -> synapse.handlers.message.EventCreationHandler: pass diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 9b40b18d5b..68058f613c 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -261,7 +261,7 @@ class StateHandler(object): logger.debug("calling resolve_state_groups from compute_event_context") entry = yield self.resolve_state_groups_for_events( - event.room_id, [e for e, _ in event.prev_events], + event.room_id, event.prev_event_ids(), ) prev_state_ids = entry.state @@ -607,9 +607,11 @@ def resolve_events_with_store(room_version, state_sets, event_map, state_res_sto return v1.resolve_events_with_store( state_sets, event_map, state_res_store.get_events, ) - elif room_version == RoomVersions.VDH_TEST: + elif room_version in ( + RoomVersions.STATE_V2_TEST, RoomVersions.V2, RoomVersions.V3, + ): return v2.resolve_events_with_store( - state_sets, event_map, state_res_store, + room_version, state_sets, event_map, state_res_store, ) else: # This should only happen if we added a version but forgot to add it to diff --git a/synapse/state/v1.py b/synapse/state/v1.py index 70a981f4a2..6d3afcae7c 100644 --- a/synapse/state/v1.py +++ b/synapse/state/v1.py @@ -21,7 +21,7 @@ from six import iteritems, iterkeys, itervalues from twisted.internet import defer from synapse import event_auth -from synapse.api.constants import EventTypes +from synapse.api.constants import EventTypes, RoomVersions from synapse.api.errors import AuthError logger = logging.getLogger(__name__) @@ -274,7 +274,11 @@ def _resolve_auth_events(events, auth_events): auth_events[(prev_event.type, prev_event.state_key)] = prev_event try: # The signatures have already been checked at this point - event_auth.check(event, auth_events, do_sig_check=False, do_size_check=False) + event_auth.check( + RoomVersions.V1, event, auth_events, + do_sig_check=False, + do_size_check=False, + ) prev_event = event except AuthError: return prev_event @@ -286,7 +290,11 @@ def _resolve_normal_events(events, auth_events): for event in _ordered_events(events): try: # The signatures have already been checked at this point - event_auth.check(event, auth_events, do_sig_check=False, do_size_check=False) + event_auth.check( + RoomVersions.V1, event, auth_events, + do_sig_check=False, + do_size_check=False, + ) return event except AuthError: pass @@ -298,6 +306,8 @@ def _resolve_normal_events(events, auth_events): def _ordered_events(events): def key_func(e): - return -int(e.depth), hashlib.sha1(e.event_id.encode('ascii')).hexdigest() + # we have to use utf-8 rather than ascii here because it turns out we allow + # people to send us events with non-ascii event IDs :/ + return -int(e.depth), hashlib.sha1(e.event_id.encode('utf-8')).hexdigest() return sorted(events, key=key_func) diff --git a/synapse/state/v2.py b/synapse/state/v2.py index 5d06f7e928..650995c92c 100644 --- a/synapse/state/v2.py +++ b/synapse/state/v2.py @@ -29,10 +29,12 @@ logger = logging.getLogger(__name__) @defer.inlineCallbacks -def resolve_events_with_store(state_sets, event_map, state_res_store): +def resolve_events_with_store(room_version, state_sets, event_map, state_res_store): """Resolves the state using the v2 state resolution algorithm Args: + room_version (str): The room version + state_sets(list): List of dicts of (type, state_key) -> event_id, which are the different state groups to resolve. @@ -53,6 +55,10 @@ def resolve_events_with_store(state_sets, event_map, state_res_store): logger.debug("Computing conflicted state") + # We use event_map as a cache, so if its None we need to initialize it + if event_map is None: + event_map = {} + # First split up the un/conflicted state unconflicted_state, conflicted_state = _seperate(state_sets) @@ -100,7 +106,7 @@ def resolve_events_with_store(state_sets, event_map, state_res_store): # Now sequentially auth each one resolved_state = yield _iterative_auth_checks( - sorted_power_events, unconflicted_state, event_map, + room_version, sorted_power_events, unconflicted_state, event_map, state_res_store, ) @@ -125,7 +131,7 @@ def resolve_events_with_store(state_sets, event_map, state_res_store): logger.debug("resolving remaining events") resolved_state = yield _iterative_auth_checks( - leftover_events, resolved_state, event_map, + room_version, leftover_events, resolved_state, event_map, state_res_store, ) @@ -155,7 +161,7 @@ def _get_power_level_for_sender(event_id, event_map, state_res_store): event = yield _get_event(event_id, event_map, state_res_store) pl = None - for aid, _ in event.auth_events: + for aid in event.auth_event_ids(): aev = yield _get_event(aid, event_map, state_res_store) if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""): pl = aev @@ -163,7 +169,7 @@ def _get_power_level_for_sender(event_id, event_map, state_res_store): if pl is None: # Couldn't find power level. Check if they're the creator of the room - for aid, _ in event.auth_events: + for aid in event.auth_event_ids(): aev = yield _get_event(aid, event_map, state_res_store) if (aev.type, aev.state_key) == (EventTypes.Create, ""): if aev.content.get("creator") == event.sender: @@ -295,7 +301,7 @@ def _add_event_and_auth_chain_to_graph(graph, event_id, event_map, graph.setdefault(eid, set()) event = yield _get_event(eid, event_map, state_res_store) - for aid, _ in event.auth_events: + for aid in event.auth_event_ids(): if aid in auth_diff: if aid not in graph: state.append(aid) @@ -346,11 +352,13 @@ def _reverse_topological_power_sort(event_ids, event_map, state_res_store, auth_ @defer.inlineCallbacks -def _iterative_auth_checks(event_ids, base_state, event_map, state_res_store): +def _iterative_auth_checks(room_version, event_ids, base_state, event_map, + state_res_store): """Sequentially apply auth checks to each event in given list, updating the state as it goes along. Args: + room_version (str) event_ids (list[str]): Ordered list of events to apply auth checks to base_state (dict[tuple[str, str], str]): The set of state to start with event_map (dict[str,FrozenEvent]) @@ -365,7 +373,7 @@ def _iterative_auth_checks(event_ids, base_state, event_map, state_res_store): event = event_map[event_id] auth_events = {} - for aid, _ in event.auth_events: + for aid in event.auth_event_ids(): ev = yield _get_event(aid, event_map, state_res_store) if ev.rejected_reason is None: @@ -381,7 +389,7 @@ def _iterative_auth_checks(event_ids, base_state, event_map, state_res_store): try: event_auth.check( - event, auth_events, + room_version, event, auth_events, do_sig_check=False, do_size_check=False ) @@ -413,9 +421,9 @@ def _mainline_sort(event_ids, resolved_power_event_id, event_map, while pl: mainline.append(pl) pl_ev = yield _get_event(pl, event_map, state_res_store) - auth_events = pl_ev.auth_events + auth_events = pl_ev.auth_event_ids() pl = None - for aid, _ in auth_events: + for aid in auth_events: ev = yield _get_event(aid, event_map, state_res_store) if (ev.type, ev.state_key) == (EventTypes.PowerLevels, ""): pl = aid @@ -460,10 +468,10 @@ def _get_mainline_depth_for_event(event, mainline_map, event_map, state_res_stor if depth is not None: defer.returnValue(depth) - auth_events = event.auth_events + auth_events = event.auth_event_ids() event = None - for aid, _ in auth_events: + for aid in auth_events: aev = yield _get_event(aid, event_map, state_res_store) if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""): event = aev diff --git a/synapse/static/client/login/index.html b/synapse/static/client/login/index.html index 96c8723cab..bcb6bc6bb7 100644 --- a/synapse/static/client/login/index.html +++ b/synapse/static/client/login/index.html @@ -12,35 +12,30 @@ <h1>Log in with one of the following methods</h1> <span id="feedback" style="color: #f00"></span> - <br/> - <br/> <div id="loading"> <img src="spinner.gif" /> </div> - <div id="cas_flow" class="login_flow" style="display:none" - onclick="gotoCas(); return false;"> - CAS Authentication: <button id="cas_button" style="margin: 10px">Log in</button> + <div id="sso_flow" class="login_flow" style="display:none"> + Single-sign on: + <form id="sso_form" action="/_matrix/client/r0/login/sso/redirect" method="get"> + <input id="sso_redirect_url" type="hidden" name="redirectUrl" value=""/> + <input type="submit" value="Log in"/> + </form> </div> - <br/> - - <form id="password_form" class="login_flow" style="display:none" - onsubmit="matrixLogin.password_login(); return false;"> - <div> - Password Authentication:<br/> - - <div style="text-align: center"> - <input id="user_id" size="32" type="text" placeholder="Matrix ID (e.g. bob)" autocapitalize="off" autocorrect="off" /> - <br/> - <input id="password" size="32" type="password" placeholder="Password"/> - <br/> + <div id="password_flow" class="login_flow" style="display:none"> + Password Authentication: + <form onsubmit="matrixLogin.password_login(); return false;"> + <input id="user_id" size="32" type="text" placeholder="Matrix ID (e.g. bob)" autocapitalize="off" autocorrect="off" /> + <br/> + <input id="password" size="32" type="password" placeholder="Password"/> + <br/> - <button type="submit" style="margin: 10px">Log in</button> - </div> - </div> - </form> + <input type="submit" value="Log in"/> + </form> + </div> <div id="no_login_types" type="button" class="login_flow" style="display:none"> Log in currently unavailable. diff --git a/synapse/static/client/login/js/login.js b/synapse/static/client/login/js/login.js index bfb7386035..3a958749a1 100644 --- a/synapse/static/client/login/js/login.js +++ b/synapse/static/client/login/js/login.js @@ -1,7 +1,8 @@ window.matrixLogin = { - endpoint: location.origin + "/_matrix/client/api/v1/login", + endpoint: location.origin + "/_matrix/client/r0/login", serverAcceptsPassword: false, - serverAcceptsCas: false + serverAcceptsCas: false, + serverAcceptsSso: false, }; var submitPassword = function(user, pwd) { @@ -40,12 +41,6 @@ var errorFunc = function(err) { } }; -var gotoCas = function() { - var this_page = window.location.origin + window.location.pathname; - var redirect_url = matrixLogin.endpoint + "/cas/redirect?redirectUrl=" + encodeURIComponent(this_page); - window.location.replace(redirect_url); -} - var setFeedbackString = function(text) { $("#feedback").text(text); }; @@ -53,12 +48,18 @@ var setFeedbackString = function(text) { var show_login = function() { $("#loading").hide(); + var this_page = window.location.origin + window.location.pathname; + $("#sso_redirect_url").val(encodeURIComponent(this_page)); + if (matrixLogin.serverAcceptsPassword) { - $("#password_form").show(); + $("#password_flow").show(); } - if (matrixLogin.serverAcceptsCas) { - $("#cas_flow").show(); + if (matrixLogin.serverAcceptsSso) { + $("#sso_flow").show(); + } else if (matrixLogin.serverAcceptsCas) { + $("#sso_form").attr("action", "/_matrix/client/r0/login/cas/redirect"); + $("#sso_flow").show(); } if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas) { @@ -67,8 +68,8 @@ var show_login = function() { }; var show_spinner = function() { - $("#password_form").hide(); - $("#cas_flow").hide(); + $("#password_flow").hide(); + $("#sso_flow").hide(); $("#no_login_types").hide(); $("#loading").show(); }; @@ -84,7 +85,10 @@ var fetch_info = function(cb) { matrixLogin.serverAcceptsCas = true; console.log("Server accepts CAS"); } - + if ("m.login.sso" === flow.type) { + matrixLogin.serverAcceptsSso = true; + console.log("Server accepts SSO"); + } if ("m.login.password" === flow.type) { matrixLogin.serverAcceptsPassword = true; console.log("Server accepts password"); diff --git a/synapse/static/client/login/style.css b/synapse/static/client/login/style.css index 73da0b5117..1cce5ed950 100644 --- a/synapse/static/client/login/style.css +++ b/synapse/static/client/login/style.css @@ -19,30 +19,23 @@ a:hover { color: #000; } a:active { color: #000; } input { - width: 90% -} - -textarea, input { - font-family: inherit; - font-size: inherit; margin: 5px; } -.smallPrint { - color: #888; - font-size: 9pt ! important; - font-style: italic ! important; +textbox, input[type="text"], input[type="password"] { + width: 90%; } -.g-recaptcha div { - margin: auto; +form { + text-align: center; + margin: 10px 0 0 0; } .login_flow { + width: 300px; text-align: left; padding: 10px; margin-bottom: 40px; - display: inline-block; -webkit-border-radius: 10px; -moz-border-radius: 10px; diff --git a/synapse/static/client/register/index.html b/synapse/static/client/register/index.html index 886f2edd1f..6edc4deb03 100644 --- a/synapse/static/client/register/index.html +++ b/synapse/static/client/register/index.html @@ -4,7 +4,7 @@ <meta name='viewport' content='width=device-width, initial-scale=1, user-scalable=no, minimum-scale=1.0, maximum-scale=1.0'> <link rel="stylesheet" href="style.css"> <script src="js/jquery-2.1.3.min.js"></script> -<script src="https://www.google.com/recaptcha/api/js/recaptcha_ajax.js"></script> +<script src="https://www.recaptcha.net/recaptcha/api/js/recaptcha_ajax.js"></script> <script src="register_config.js"></script> <script src="js/register.js"></script> </head> diff --git a/synapse/static/index.html b/synapse/static/index.html new file mode 100644 index 0000000000..d3f1c7dce0 --- /dev/null +++ b/synapse/static/index.html @@ -0,0 +1,63 @@ +<!DOCTYPE html> +<html lang="en"> + <head> + <title>Synapse is running</title> + <style> + body { + font-family: -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Oxygen-Sans,Ubuntu,Cantarell,"Helvetica Neue",sans-serif; + max-width: 40em; + margin: auto; + text-align: center; + } + h1, p { + margin: 1.5em; + } + hr { + border: none; + background-color: #ccc; + color: #ccc; + height: 1px; + width: 7em; + margin-top: 4em; + } + .logo { + display: block; + width: 12em; + margin: 4em auto; + } + </style> + </head> + <body> + <div class="logo"> + <svg role="img" aria-label="[Matrix logo]" viewBox="0 0 200 85" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> + <g id="parent" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd"> + <g id="child" transform="translate(-122.000000, -6.000000)" fill="#000000" fill-rule="nonzero"> + <g id="matrix-logo" transform="translate(122.000000, 6.000000)"> + <polygon id="left-bracket" points="2.24708861 1.93811009 2.24708861 82.7268844 8.10278481 82.7268844 8.10278481 84.6652459 0 84.6652459 0 0 8.10278481 0 8.10278481 1.93811009"></polygon> + <path d="M24.8073418,27.5493174 L24.8073418,31.6376991 L24.924557,31.6376991 C26.0227848,30.0814294 27.3455696,28.8730642 28.8951899,28.0163743 C30.4437975,27.1611927 32.2189873,26.7318422 34.218481,26.7318422 C36.1394937,26.7318422 37.8946835,27.102622 39.4825316,27.8416679 C41.0708861,28.5819706 42.276962,29.8856073 43.1005063,31.7548404 C44.0017722,30.431345 45.2270886,29.2629486 46.7767089,28.2506569 C48.3253165,27.2388679 50.158481,26.7318422 52.2764557,26.7318422 C53.8843038,26.7318422 55.3736709,26.9269101 56.7473418,27.3162917 C58.1189873,27.7056734 59.295443,28.3285835 60.2759494,29.185022 C61.255443,30.0422147 62.02,31.1615927 62.5701266,32.5426532 C63.1187342,33.9262275 63.3936709,35.5898349 63.3936709,37.5372459 L63.3936709,57.7443688 L55.0410127,57.7441174 L55.0410127,40.6319376 C55.0410127,39.6201486 55.0020253,38.6661761 54.9232911,37.7700202 C54.8440506,36.8751211 54.6293671,36.0968606 54.2764557,35.4339817 C53.9232911,34.772611 53.403038,34.2464807 52.7177215,33.8568477 C52.0313924,33.4689743 51.0997468,33.2731523 49.9235443,33.2731523 C48.7473418,33.2731523 47.7962025,33.4983853 47.0706329,33.944578 C46.344557,34.393033 45.7764557,34.9774826 45.3650633,35.6969211 C44.9534177,36.4181193 44.6787342,37.2353431 44.5417722,38.150855 C44.4037975,39.0653615 44.3356962,39.9904257 44.3356962,40.9247908 L44.3356962,57.7443688 L35.9835443,57.7443688 L35.9835443,40.8079009 C35.9835443,39.9124991 35.963038,39.0263982 35.9253165,38.150855 C35.8853165,37.2743064 35.7192405,36.4666349 35.424557,35.7263321 C35.1303797,34.9872862 34.64,34.393033 33.9539241,33.944578 C33.2675949,33.4983853 32.2579747,33.2731523 30.9248101,33.2731523 C30.5321519,33.2731523 30.0126582,33.3608826 29.3663291,33.5365945 C28.7192405,33.7118037 28.0913924,34.0433688 27.4840506,34.5292789 C26.875443,35.0164459 26.3564557,35.7172826 25.9250633,36.6315376 C25.4934177,37.5470495 25.2779747,38.7436 25.2779747,40.2229486 L25.2779747,57.7441174 L16.9260759,57.7443688 L16.9260759,27.5493174 L24.8073418,27.5493174 Z" id="m"></path> + <path d="M68.7455696,31.9886202 C69.6075949,30.7033339 70.7060759,29.672189 72.0397468,28.8926716 C73.3724051,28.1141596 74.8716456,27.5596239 76.5387342,27.2283101 C78.2050633,26.8977505 79.8817722,26.7315908 81.5678481,26.7315908 C83.0974684,26.7315908 84.6458228,26.8391798 86.2144304,27.0525982 C87.7827848,27.2675248 89.2144304,27.6865688 90.5086076,28.3087248 C91.8025316,28.9313835 92.8610127,29.7983798 93.6848101,30.9074514 C94.5083544,32.0170257 94.92,33.4870734 94.92,35.3173431 L94.92,51.026844 C94.92,52.3913138 94.998481,53.6941963 95.1556962,54.9400165 C95.3113924,56.1865908 95.5863291,57.120956 95.9787342,57.7436147 L87.5091139,57.7436147 C87.3518987,57.276055 87.2240506,56.7996972 87.1265823,56.3125303 C87.0278481,55.8266202 86.9592405,55.3301523 86.9207595,54.8236294 C85.5873418,56.1865908 84.0182278,57.1405633 82.2156962,57.6857982 C80.4113924,58.2295248 78.5683544,58.503022 76.6860759,58.503022 C75.2346835,58.503022 73.8817722,58.3275615 72.6270886,57.9776459 C71.3718987,57.6269761 70.2744304,57.082244 69.3334177,56.3411872 C68.3921519,55.602644 67.656962,54.6680275 67.1275949,53.5390972 C66.5982278,52.410167 66.3331646,51.065556 66.3331646,49.5087835 C66.3331646,47.7961578 66.6367089,46.384178 67.2455696,45.2756092 C67.8529114,44.1652807 68.6367089,43.2799339 69.5987342,42.6173064 C70.5589873,41.9556844 71.6567089,41.4592165 72.8924051,41.1284055 C74.1273418,40.7978459 75.3721519,40.5356606 76.6270886,40.3398385 C77.8820253,40.1457761 79.116962,39.9896716 80.3329114,39.873033 C81.5483544,39.7558917 82.6270886,39.5804312 83.5681013,39.3469028 C84.5093671,39.1133743 85.2536709,38.7732624 85.8032911,38.3250587 C86.3513924,37.8773578 86.6063291,37.2252881 86.5678481,36.3680954 C86.5678481,35.4731963 86.4210127,34.7620532 86.1268354,34.2366771 C85.8329114,33.7113009 85.4405063,33.3018092 84.9506329,33.0099615 C84.4602532,32.7181138 83.8916456,32.5232972 83.2450633,32.4255119 C82.5977215,32.3294862 81.9010127,32.2797138 81.156962,32.2797138 C79.5098734,32.2797138 78.2159494,32.6303835 77.2746835,33.3312202 C76.3339241,34.0320569 75.7837975,35.2007046 75.6275949,36.8354037 L67.275443,36.8354037 C67.3924051,34.8892495 67.8817722,33.2726495 68.7455696,31.9886202 Z M85.2440506,43.6984752 C84.7149367,43.873433 84.1460759,44.0189798 83.5387342,44.1361211 C82.9306329,44.253011 82.2936709,44.350545 81.6270886,44.4279688 C80.96,44.5066495 80.2934177,44.6034294 79.6273418,44.7203193 C78.9994937,44.8362037 78.3820253,44.9933138 77.7749367,45.1871248 C77.1663291,45.3829468 76.636962,45.6451321 76.1865823,45.9759431 C75.7349367,46.3070055 75.3724051,46.7263009 75.0979747,47.2313156 C74.8232911,47.7375872 74.6863291,48.380356 74.6863291,49.1588679 C74.6863291,49.8979138 74.8232911,50.5218294 75.0979747,51.026844 C75.3724051,51.5338697 75.7455696,51.9328037 76.2159494,52.2246514 C76.6863291,52.5164991 77.2349367,52.7213706 77.8632911,52.8375064 C78.4898734,52.9546477 79.136962,53.012967 79.8037975,53.012967 C81.4506329,53.012967 82.724557,52.740978 83.6273418,52.1952404 C84.5288608,51.6507596 85.1949367,50.9981872 85.6270886,50.2382771 C86.0579747,49.4793725 86.323038,48.7119211 86.4212658,47.9321523 C86.518481,47.1536404 86.5681013,46.5304789 86.5681013,46.063422 L86.5681013,42.9677248 C86.2146835,43.2799339 85.7736709,43.5230147 85.2440506,43.6984752 Z" id="a"></path> + <path d="M116.917975,27.5493174 L116.917975,33.0976917 L110.801266,33.0976917 L110.801266,48.0492936 C110.801266,49.4502128 111.036203,50.3850807 111.507089,50.8518862 C111.976962,51.3191945 112.918734,51.5527229 114.33038,51.5527229 C114.801013,51.5527229 115.251392,51.5336183 115.683038,51.4944037 C116.114177,51.4561945 116.526076,51.3968697 116.917975,51.3194459 L116.917975,57.7438661 C116.212152,57.860756 115.427595,57.9381798 114.565316,57.9778972 C113.702785,58.0153523 112.859747,58.0357138 112.036203,58.0357138 C110.742278,58.0357138 109.516456,57.9477321 108.36,57.7722716 C107.202785,57.5975651 106.183544,57.2577046 105.301519,56.7509303 C104.418987,56.2454128 103.722785,55.5242147 103.213418,54.5898495 C102.703038,53.6562385 102.448608,52.4292716 102.448608,50.9099541 L102.448608,33.0976917 L97.3903797,33.0976917 L97.3903797,27.5493174 L102.448608,27.5493174 L102.448608,18.4967596 L110.801013,18.4967596 L110.801013,27.5493174 L116.917975,27.5493174 Z" id="t"></path> + <path d="M128.857975,27.5493174 L128.857975,33.1565138 L128.975696,33.1565138 C129.367089,32.2213945 129.896203,31.3559064 130.563544,30.557033 C131.23038,29.7596679 131.99443,29.0776844 132.857215,28.5130936 C133.719241,27.9495083 134.641266,27.5113596 135.622532,27.1988991 C136.601772,26.8879468 137.622025,26.7315908 138.681013,26.7315908 C139.229873,26.7315908 139.836962,26.8296275 140.504304,27.0239413 L140.504304,34.7336477 C140.111646,34.6552183 139.641013,34.586844 139.092658,34.5290275 C138.543291,34.4704569 138.014177,34.4410459 137.504304,34.4410459 C135.974937,34.4410459 134.681013,34.6949358 133.622785,35.2004532 C132.564051,35.7067248 131.711392,36.397255 131.064051,37.2735523 C130.417215,38.1501009 129.955443,39.1714422 129.681266,40.3398385 C129.407089,41.5074807 129.269873,42.7736624 129.269873,44.1361211 L129.269873,57.7438661 L120.917722,57.7438661 L120.917722,27.5493174 L128.857975,27.5493174 Z" id="r"></path> + <path d="M144.033165,22.8767376 L144.033165,16.0435798 L152.386076,16.0435798 L152.386076,22.8767376 L144.033165,22.8767376 Z M152.386076,27.5493174 L152.386076,57.7438661 L144.033165,57.7438661 L144.033165,27.5493174 L152.386076,27.5493174 Z" id="i"></path> + <polygon id="x" points="156.738228 27.5493174 166.266582 27.5493174 171.619494 35.4337303 176.913418 27.5493174 186.147848 27.5493174 176.148861 41.6831927 187.383544 57.7441174 177.85443 57.7441174 171.501772 48.2245028 165.148861 57.7441174 155.797468 57.7441174 166.737468 41.8589046"></polygon> + <polygon id="right-bracket" points="197.580759 82.7268844 197.580759 1.93811009 191.725063 1.93811009 191.725063 0 199.828354 0 199.828354 84.6652459 191.725063 84.6652459 191.725063 82.7268844"></polygon> + </g> + </g> + </g> + </svg> + </div> + <h1>It works! Synapse is running</h1> + <p>Your Synapse server is listening on this port and is ready for messages.</p> + <p>To use this server you'll need <a href="https://matrix.org/docs/projects/try-matrix-now.html#clients" target="_blank">a Matrix client</a>. + </p> + <p>Welcome to the Matrix universe :)</p> + <hr> + <p> + <small> + <a href="https://matrix.org" target="_blank"> + matrix.org + </a> + </small> + </p> + </body> +</html> diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 53c685c173..42cd3c83ad 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -14,12 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime +import calendar import logging import time -from dateutil import tz - from synapse.api.constants import PresenceState from synapse.storage.devices import DeviceStore from synapse.storage.user_erasure_store import UserErasureStore @@ -119,7 +117,6 @@ class DataStore(RoomMemberStore, RoomStore, db_conn, "device_lists_stream", "stream_id", ) - self._transaction_id_gen = IdGenerator(db_conn, "sent_transactions", "id") self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id") self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") @@ -320,7 +317,7 @@ class DataStore(RoomMemberStore, RoomStore, thirty_days_ago_in_secs)) for row in txn: - if row[0] is 'unknown': + if row[0] == 'unknown': pass results[row[0]] = row[1] @@ -358,10 +355,11 @@ class DataStore(RoomMemberStore, RoomStore, """ Returns millisecond unixtime for start of UTC day. """ - now = datetime.datetime.utcnow() - today_start = datetime.datetime(now.year, now.month, - now.day, tzinfo=tz.tzutc()) - return int(time.mktime(today_start.timetuple())) * 1000 + now = time.gmtime() + today_start = calendar.timegm(( + now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0, + )) + return today_start * 1000 def generate_user_daily_visits(self): """ diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d9d0255d0b..5a80eef211 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import itertools import logging import sys import threading @@ -26,9 +27,12 @@ from prometheus_client import Histogram from twisted.internet import defer from synapse.api.errors import StoreError -from synapse.storage.engines import PostgresEngine +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.types import get_domain_from_id from synapse.util.caches.descriptors import Cache from synapse.util.logcontext import LoggingContext, PreserveLoggingContext +from synapse.util.stringutils import exception_to_unicode logger = logging.getLogger(__name__) @@ -48,6 +52,25 @@ sql_query_timer = Histogram("synapse_storage_query_time", "sec", ["verb"]) sql_txn_timer = Histogram("synapse_storage_transaction_time", "sec", ["desc"]) +# Unique indexes which have been added in background updates. Maps from table name +# to the name of the background update which added the unique index to that table. +# +# This is used by the upsert logic to figure out which tables are safe to do a proper +# UPSERT on: until the relevant background update has completed, we +# have to emulate an upsert by locking the table. +# +UNIQUE_INDEX_BACKGROUND_UPDATES = { + "user_ips": "user_ips_device_unique_index", + "device_lists_remote_extremeties": "device_lists_remote_extremeties_unique_idx", + "device_lists_remote_cache": "device_lists_remote_cache_unique_idx", + "event_search": "event_search_event_id_idx", +} + +# This is a special cache name we use to batch multiple invalidations of caches +# based on the current state when notifying workers over replication. +_CURRENT_STATE_CACHE_NAME = "cs_cache_fake" + + class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object passed to the constructor. Adds logging and metrics to the .execute() @@ -83,6 +106,14 @@ class LoggingTransaction(object): def __iter__(self): return self.txn.__iter__() + def execute_batch(self, sql, args): + if isinstance(self.database_engine, PostgresEngine): + from psycopg2.extras import execute_batch + self._do_execute(lambda *x: execute_batch(self.txn, *x), sql, args) + else: + for val in args: + self.execute(sql, val) + def execute(self, sql, *args): self._do_execute(self.txn.execute, sql, *args) @@ -191,6 +222,57 @@ class SQLBaseStore(object): self.database_engine = hs.database_engine + # A set of tables that are not safe to use native upserts in. + self._unsafe_to_upsert_tables = set(UNIQUE_INDEX_BACKGROUND_UPDATES.keys()) + + # We add the user_directory_search table to the blacklist on SQLite + # because the existing search table does not have an index, making it + # unsafe to use native upserts. + if isinstance(self.database_engine, Sqlite3Engine): + self._unsafe_to_upsert_tables.add("user_directory_search") + + if self.database_engine.can_native_upsert: + # Check ASAP (and then later, every 1s) to see if we have finished + # background updates of tables that aren't safe to update. + self._clock.call_later( + 0.0, + run_as_background_process, + "upsert_safety_check", + self._check_safe_to_upsert + ) + + @defer.inlineCallbacks + def _check_safe_to_upsert(self): + """ + Is it safe to use native UPSERT? + + If there are background updates, we will need to wait, as they may be + the addition of indexes that set the UNIQUE constraint that we require. + + If the background updates have not completed, wait 15 sec and check again. + """ + updates = yield self._simple_select_list( + "background_updates", + keyvalues=None, + retcols=["update_name"], + desc="check_background_updates", + ) + updates = [x["update_name"] for x in updates] + + for table, update_name in UNIQUE_INDEX_BACKGROUND_UPDATES.items(): + if update_name not in updates: + logger.debug("Now safe to upsert in %s", table) + self._unsafe_to_upsert_tables.discard(table) + + # If there's any updates still running, reschedule to run. + if updates: + self._clock.call_later( + 15.0, + run_as_background_process, + "upsert_safety_check", + self._check_safe_to_upsert + ) + def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() @@ -249,32 +331,32 @@ class SQLBaseStore(object): except self.database_engine.module.OperationalError as e: # This can happen if the database disappears mid # transaction. - logger.warn( + logger.warning( "[TXN OPERROR] {%s} %s %d/%d", - name, e, i, N + name, exception_to_unicode(e), i, N ) if i < N: i += 1 try: conn.rollback() except self.database_engine.module.Error as e1: - logger.warn( + logger.warning( "[TXN EROLL] {%s} %s", - name, e1, + name, exception_to_unicode(e1), ) continue raise except self.database_engine.module.DatabaseError as e: if self.database_engine.is_deadlock(e): - logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N) + logger.warning("[TXN DEADLOCK] {%s} %d/%d", name, i, N) if i < N: i += 1 try: conn.rollback() except self.database_engine.module.Error as e1: - logger.warn( + logger.warning( "[TXN EROLL] {%s} %s", - name, e1, + name, exception_to_unicode(e1), ) continue raise @@ -493,8 +575,15 @@ class SQLBaseStore(object): txn.executemany(sql, vals) @defer.inlineCallbacks - def _simple_upsert(self, table, keyvalues, values, - insertion_values={}, desc="_simple_upsert", lock=True): + def _simple_upsert( + self, + table, + keyvalues, + values, + insertion_values={}, + desc="_simple_upsert", + lock=True + ): """ `lock` should generally be set to True (the default), but can be set @@ -515,16 +604,21 @@ class SQLBaseStore(object): inserting lock (bool): True to lock the table when doing the upsert. Returns: - Deferred(bool): True if a new entry was created, False if an - existing one was updated. + Deferred(None or bool): Native upserts always return None. Emulated + upserts return True if a new entry was created, False if an existing + one was updated. """ attempts = 0 while True: try: result = yield self.runInteraction( desc, - self._simple_upsert_txn, table, keyvalues, values, insertion_values, - lock=lock + self._simple_upsert_txn, + table, + keyvalues, + values, + insertion_values, + lock=lock, ) defer.returnValue(result) except self.database_engine.module.IntegrityError as e: @@ -536,30 +630,111 @@ class SQLBaseStore(object): # presumably we raced with another transaction: let's retry. logger.warn( - "IntegrityError when upserting into %s; retrying: %s", - table, e + "%s when upserting into %s; retrying: %s", e.__name__, table, e ) - def _simple_upsert_txn(self, txn, table, keyvalues, values, insertion_values={}, - lock=True): + def _simple_upsert_txn( + self, + txn, + table, + keyvalues, + values, + insertion_values={}, + lock=True, + ): + """ + Pick the UPSERT method which works best on the platform. Either the + native one (Pg9.5+, recent SQLites), or fall back to an emulated method. + + Args: + txn: The transaction to use. + table (str): The table to upsert into + keyvalues (dict): The unique key tables and their new values + values (dict): The nonunique columns and their new values + insertion_values (dict): additional key/values to use only when + inserting + lock (bool): True to lock the table when doing the upsert. + Returns: + None or bool: Native upserts always return None. Emulated + upserts return True if a new entry was created, False if an existing + one was updated. + """ + if ( + self.database_engine.can_native_upsert + and table not in self._unsafe_to_upsert_tables + ): + return self._simple_upsert_txn_native_upsert( + txn, + table, + keyvalues, + values, + insertion_values=insertion_values, + ) + else: + return self._simple_upsert_txn_emulated( + txn, + table, + keyvalues, + values, + insertion_values=insertion_values, + lock=lock, + ) + + def _simple_upsert_txn_emulated( + self, txn, table, keyvalues, values, insertion_values={}, lock=True + ): + """ + Args: + table (str): The table to upsert into + keyvalues (dict): The unique key tables and their new values + values (dict): The nonunique columns and their new values + insertion_values (dict): additional key/values to use only when + inserting + lock (bool): True to lock the table when doing the upsert. + Returns: + bool: Return True if a new entry was created, False if an existing + one was updated. + """ # We need to lock the table :(, unless we're *really* careful if lock: self.database_engine.lock_table(txn, table) - # First try to update. - sql = "UPDATE %s SET %s WHERE %s" % ( - table, - ", ".join("%s = ?" % (k,) for k in values), - " AND ".join("%s = ?" % (k,) for k in keyvalues) - ) - sqlargs = list(values.values()) + list(keyvalues.values()) + def _getwhere(key): + # If the value we're passing in is None (aka NULL), we need to use + # IS, not =, as NULL = NULL equals NULL (False). + if keyvalues[key] is None: + return "%s IS ?" % (key,) + else: + return "%s = ?" % (key,) - txn.execute(sql, sqlargs) - if txn.rowcount > 0: - # successfully updated at least one row. - return False + if not values: + # If `values` is empty, then all of the values we care about are in + # the unique key, so there is nothing to UPDATE. We can just do a + # SELECT instead to see if it exists. + sql = "SELECT 1 FROM %s WHERE %s" % ( + table, + " AND ".join(_getwhere(k) for k in keyvalues) + ) + sqlargs = list(keyvalues.values()) + txn.execute(sql, sqlargs) + if txn.fetchall(): + # We have an existing record. + return False + else: + # First try to update. + sql = "UPDATE %s SET %s WHERE %s" % ( + table, + ", ".join("%s = ?" % (k,) for k in values), + " AND ".join(_getwhere(k) for k in keyvalues) + ) + sqlargs = list(values.values()) + list(keyvalues.values()) - # We didn't update any rows so insert a new one + txn.execute(sql, sqlargs) + if txn.rowcount > 0: + # successfully updated at least one row. + return False + + # We didn't find any existing rows, so insert a new one allvalues = {} allvalues.update(keyvalues) allvalues.update(values) @@ -568,12 +743,144 @@ class SQLBaseStore(object): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, ", ".join(k for k in allvalues), - ", ".join("?" for _ in allvalues) + ", ".join("?" for _ in allvalues), ) txn.execute(sql, list(allvalues.values())) # successfully inserted return True + def _simple_upsert_txn_native_upsert( + self, txn, table, keyvalues, values, insertion_values={} + ): + """ + Use the native UPSERT functionality in recent PostgreSQL versions. + + Args: + table (str): The table to upsert into + keyvalues (dict): The unique key tables and their new values + values (dict): The nonunique columns and their new values + insertion_values (dict): additional key/values to use only when + inserting + Returns: + None + """ + allvalues = {} + allvalues.update(keyvalues) + allvalues.update(values) + allvalues.update(insertion_values) + + sql = ( + "INSERT INTO %s (%s) VALUES (%s) " + "ON CONFLICT (%s) DO UPDATE SET %s" + ) % ( + table, + ", ".join(k for k in allvalues), + ", ".join("?" for _ in allvalues), + ", ".join(k for k in keyvalues), + ", ".join(k + "=EXCLUDED." + k for k in values), + ) + txn.execute(sql, list(allvalues.values())) + + def _simple_upsert_many_txn( + self, txn, table, key_names, key_values, value_names, value_values + ): + """ + Upsert, many times. + + Args: + table (str): The table to upsert into + key_names (list[str]): The key column names. + key_values (list[list]): A list of each row's key column values. + value_names (list[str]): The value column names. If empty, no + values will be used, even if value_values is provided. + value_values (list[list]): A list of each row's value column values. + Returns: + None + """ + if ( + self.database_engine.can_native_upsert + and table not in self._unsafe_to_upsert_tables + ): + return self._simple_upsert_many_txn_native_upsert( + txn, table, key_names, key_values, value_names, value_values + ) + else: + return self._simple_upsert_many_txn_emulated( + txn, table, key_names, key_values, value_names, value_values + ) + + def _simple_upsert_many_txn_emulated( + self, txn, table, key_names, key_values, value_names, value_values + ): + """ + Upsert, many times, but without native UPSERT support or batching. + + Args: + table (str): The table to upsert into + key_names (list[str]): The key column names. + key_values (list[list]): A list of each row's key column values. + value_names (list[str]): The value column names. If empty, no + values will be used, even if value_values is provided. + value_values (list[list]): A list of each row's value column values. + Returns: + None + """ + # No value columns, therefore make a blank list so that the following + # zip() works correctly. + if not value_names: + value_values = [() for x in range(len(key_values))] + + for keyv, valv in zip(key_values, value_values): + _keys = {x: y for x, y in zip(key_names, keyv)} + _vals = {x: y for x, y in zip(value_names, valv)} + + self._simple_upsert_txn_emulated(txn, table, _keys, _vals) + + def _simple_upsert_many_txn_native_upsert( + self, txn, table, key_names, key_values, value_names, value_values + ): + """ + Upsert, many times, using batching where possible. + + Args: + table (str): The table to upsert into + key_names (list[str]): The key column names. + key_values (list[list]): A list of each row's key column values. + value_names (list[str]): The value column names. If empty, no + values will be used, even if value_values is provided. + value_values (list[list]): A list of each row's value column values. + Returns: + None + """ + allnames = [] + allnames.extend(key_names) + allnames.extend(value_names) + + if not value_names: + # No value columns, therefore make a blank list so that the + # following zip() works correctly. + latter = "NOTHING" + value_values = [() for x in range(len(key_values))] + else: + latter = ( + "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in value_names) + ) + + sql = "INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) DO %s" % ( + table, + ", ".join(k for k in allnames), + ", ".join("?" for _ in allnames), + ", ".join(key_names), + latter, + ) + + args = [] + + for x, y in zip(key_values, value_values): + args.append(tuple(x) + tuple(y)) + + return txn.execute_batch(sql, args) + def _simple_select_one(self, table, keyvalues, retcols, allow_none=False, desc="_simple_select_one"): """Executes a SELECT query on the named table, which is expected to @@ -849,9 +1156,9 @@ class SQLBaseStore(object): rowcount = cls._simple_update_txn(txn, table, keyvalues, updatevalues) if rowcount == 0: - raise StoreError(404, "No row found") + raise StoreError(404, "No row found (%s)" % (table,)) if rowcount > 1: - raise StoreError(500, "More than one row matched") + raise StoreError(500, "More than one row matched (%s)" % (table,)) @staticmethod def _simple_select_one_txn(txn, table, keyvalues, retcols, @@ -868,9 +1175,9 @@ class SQLBaseStore(object): if not row: if allow_none: return None - raise StoreError(404, "No row found") + raise StoreError(404, "No row found (%s)" % (table,)) if txn.rowcount > 1: - raise StoreError(500, "More than one row matched") + raise StoreError(500, "More than one row matched (%s)" % (table,)) return dict(zip(retcols, row)) @@ -902,9 +1209,9 @@ class SQLBaseStore(object): txn.execute(sql, list(keyvalues.values())) if txn.rowcount == 0: - raise StoreError(404, "No row found") + raise StoreError(404, "No row found (%s)" % (table,)) if txn.rowcount > 1: - raise StoreError(500, "more than one row matched") + raise StoreError(500, "More than one row matched (%s)" % (table,)) def _simple_delete(self, table, keyvalues, desc): return self.runInteraction( @@ -1005,6 +1312,84 @@ class SQLBaseStore(object): be invalidated. """ txn.call_after(cache_func.invalidate, keys) + self._send_invalidation_to_replication(txn, cache_func.__name__, keys) + + def _invalidate_state_caches_and_stream(self, txn, room_id, members_changed): + """Special case invalidation of caches based on current state. + + We special case this so that we can batch the cache invalidations into a + single replication poke. + + Args: + txn + room_id (str): Room where state changed + members_changed (iterable[str]): The user_ids of members that have changed + """ + txn.call_after(self._invalidate_state_caches, room_id, members_changed) + + keys = itertools.chain([room_id], members_changed) + self._send_invalidation_to_replication( + txn, _CURRENT_STATE_CACHE_NAME, keys, + ) + + def _invalidate_state_caches(self, room_id, members_changed): + """Invalidates caches that are based on the current state, but does + not stream invalidations down replication. + + Args: + room_id (str): Room where state changed + members_changed (iterable[str]): The user_ids of members that have + changed + """ + for member in members_changed: + self._attempt_to_invalidate_cache( + "get_rooms_for_user_with_stream_ordering", (member,), + ) + + for host in set(get_domain_from_id(u) for u in members_changed): + self._attempt_to_invalidate_cache( + "is_host_joined", (room_id, host,), + ) + self._attempt_to_invalidate_cache( + "was_host_joined", (room_id, host,), + ) + + self._attempt_to_invalidate_cache( + "get_users_in_room", (room_id,), + ) + self._attempt_to_invalidate_cache( + "get_room_summary", (room_id,), + ) + self._attempt_to_invalidate_cache( + "get_current_state_ids", (room_id,), + ) + + def _attempt_to_invalidate_cache(self, cache_name, key): + """Attempts to invalidate the cache of the given name, ignoring if the + cache doesn't exist. Mainly used for invalidating caches on workers, + where they may not have the cache. + + Args: + cache_name (str) + key (tuple) + """ + try: + getattr(self, cache_name).invalidate(key) + except AttributeError: + # We probably haven't pulled in the cache in this worker, + # which is fine. + pass + + def _send_invalidation_to_replication(self, txn, cache_name, keys): + """Notifies replication that given cache has been invalidated. + + Note that this does *not* invalidate the cache locally. + + Args: + txn + cache_name (str) + keys (iterable[str]) + """ if isinstance(self.database_engine, PostgresEngine): # get_next() returns a context manager which is designed to wrap @@ -1022,7 +1407,7 @@ class SQLBaseStore(object): table="cache_invalidation_stream", values={ "stream_id": stream_id, - "cache_func": cache_func.__name__, + "cache_func": cache_name, "keys": list(keys), "invalidation_ts": self.clock.time_msec(), } diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 5fe1ca2de7..60cdc884e6 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -240,7 +240,7 @@ class BackgroundUpdateStore(SQLBaseStore): * An integer count of the number of items to update in this batch. The handler should return a deferred integer count of items updated. - The hander is responsible for updating the progress of the update. + The handler is responsible for updating the progress of the update. Args: update_name(str): The name of the update that this code handles. diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 9ad17b7c25..9c21362226 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -65,7 +65,32 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): columns=["last_seen"], ) - # (user_id, access_token, ip) -> (user_agent, device_id, last_seen) + self.register_background_update_handler( + "user_ips_analyze", + self._analyze_user_ip, + ) + + self.register_background_update_handler( + "user_ips_remove_dupes", + self._remove_user_ip_dupes, + ) + + # Register a unique index + self.register_background_index_update( + "user_ips_device_unique_index", + index_name="user_ips_user_token_ip_unique_index", + table="user_ips", + columns=["user_id", "access_token", "ip"], + unique=True, + ) + + # Drop the old non-unique index + self.register_background_update_handler( + "user_ips_drop_nonunique_index", + self._remove_user_ip_nonunique, + ) + + # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) self._batch_row_update = {} self._client_ip_looper = self._clock.looping_call( @@ -76,6 +101,205 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): ) @defer.inlineCallbacks + def _remove_user_ip_nonunique(self, progress, batch_size): + def f(conn): + txn = conn.cursor() + txn.execute( + "DROP INDEX IF EXISTS user_ips_user_ip" + ) + txn.close() + + yield self.runWithConnection(f) + yield self._end_background_update("user_ips_drop_nonunique_index") + defer.returnValue(1) + + @defer.inlineCallbacks + def _analyze_user_ip(self, progress, batch_size): + # Background update to analyze user_ips table before we run the + # deduplication background update. The table may not have been analyzed + # for ages due to the table locks. + # + # This will lock out the naive upserts to user_ips while it happens, but + # the analyze should be quick (28GB table takes ~10s) + def user_ips_analyze(txn): + txn.execute("ANALYZE user_ips") + + yield self.runInteraction( + "user_ips_analyze", user_ips_analyze + ) + + yield self._end_background_update("user_ips_analyze") + + defer.returnValue(1) + + @defer.inlineCallbacks + def _remove_user_ip_dupes(self, progress, batch_size): + # This works function works by scanning the user_ips table in batches + # based on `last_seen`. For each row in a batch it searches the rest of + # the table to see if there are any duplicates, if there are then they + # are removed and replaced with a suitable row. + + # Fetch the start of the batch + begin_last_seen = progress.get("last_seen", 0) + + def get_last_seen(txn): + txn.execute( + """ + SELECT last_seen FROM user_ips + WHERE last_seen > ? + ORDER BY last_seen + LIMIT 1 + OFFSET ? + """, + (begin_last_seen, batch_size) + ) + row = txn.fetchone() + if row: + return row[0] + else: + return None + + # Get a last seen that has roughly `batch_size` since `begin_last_seen` + end_last_seen = yield self.runInteraction( + "user_ips_dups_get_last_seen", get_last_seen + ) + + # If it returns None, then we're processing the last batch + last = end_last_seen is None + + logger.info( + "Scanning for duplicate 'user_ips' rows in range: %s <= last_seen < %s", + begin_last_seen, end_last_seen, + ) + + def remove(txn): + # This works by looking at all entries in the given time span, and + # then for each (user_id, access_token, ip) tuple in that range + # checking for any duplicates in the rest of the table (via a join). + # It then only returns entries which have duplicates, and the max + # last_seen across all duplicates, which can the be used to delete + # all other duplicates. + # It is efficient due to the existence of (user_id, access_token, + # ip) and (last_seen) indices. + + # Define the search space, which requires handling the last batch in + # a different way + if last: + clause = "? <= last_seen" + args = (begin_last_seen,) + else: + clause = "? <= last_seen AND last_seen < ?" + args = (begin_last_seen, end_last_seen) + + # (Note: The DISTINCT in the inner query is important to ensure that + # the COUNT(*) is accurate, otherwise double counting may happen due + # to the join effectively being a cross product) + txn.execute( + """ + SELECT user_id, access_token, ip, + MAX(device_id), MAX(user_agent), MAX(last_seen), + COUNT(*) + FROM ( + SELECT DISTINCT user_id, access_token, ip + FROM user_ips + WHERE {} + ) c + INNER JOIN user_ips USING (user_id, access_token, ip) + GROUP BY user_id, access_token, ip + HAVING count(*) > 1 + """.format(clause), + args + ) + res = txn.fetchall() + + # We've got some duplicates + for i in res: + user_id, access_token, ip, device_id, user_agent, last_seen, count = i + + # We want to delete the duplicates so we end up with only a + # single row. + # + # The naive way of doing this would be just to delete all rows + # and reinsert a constructed row. However, if there are a lot of + # duplicate rows this can cause the table to grow a lot, which + # can be problematic in two ways: + # 1. If user_ips is already large then this can cause the + # table to rapidly grow, potentially filling the disk. + # 2. Reinserting a lot of rows can confuse the table + # statistics for postgres, causing it to not use the + # correct indices for the query above, resulting in a full + # table scan. This is incredibly slow for large tables and + # can kill database performance. (This seems to mainly + # happen for the last query where the clause is simply `? < + # last_seen`) + # + # So instead we want to delete all but *one* of the duplicate + # rows. That is hard to do reliably, so we cheat and do a two + # step process: + # 1. Delete all rows with a last_seen strictly less than the + # max last_seen. This hopefully results in deleting all but + # one row the majority of the time, but there may be + # duplicate last_seen + # 2. If multiple rows remain, we fall back to the naive method + # and simply delete all rows and reinsert. + # + # Note that this relies on no new duplicate rows being inserted, + # but if that is happening then this entire process is futile + # anyway. + + # Do step 1: + + txn.execute( + """ + DELETE FROM user_ips + WHERE user_id = ? AND access_token = ? AND ip = ? AND last_seen < ? + """, + (user_id, access_token, ip, last_seen) + ) + if txn.rowcount == count - 1: + # We deleted all but one of the duplicate rows, i.e. there + # is exactly one remaining and so there is nothing left to + # do. + continue + elif txn.rowcount >= count: + raise Exception( + "We deleted more duplicate rows from 'user_ips' than expected", + ) + + # The previous step didn't delete enough rows, so we fallback to + # step 2: + + # Drop all the duplicates + txn.execute( + """ + DELETE FROM user_ips + WHERE user_id = ? AND access_token = ? AND ip = ? + """, + (user_id, access_token, ip) + ) + + # Add in one to be the last_seen + txn.execute( + """ + INSERT INTO user_ips + (user_id, access_token, ip, device_id, user_agent, last_seen) + VALUES (?, ?, ?, ?, ?, ?) + """, + (user_id, access_token, ip, device_id, user_agent, last_seen) + ) + + self._background_update_progress_txn( + txn, "user_ips_remove_dupes", {"last_seen": end_last_seen} + ) + + yield self.runInteraction("user_ips_dups_remove", remove) + + if last: + yield self._end_background_update("user_ips_remove_dupes") + + defer.returnValue(batch_size) + + @defer.inlineCallbacks def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id, now=None): if not now: @@ -114,7 +338,10 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): ) def _update_client_ips_batch_txn(self, txn, to_update): - self.database_engine.lock_table(txn, "user_ips") + if "user_ips" in self._unsafe_to_upsert_tables or ( + not self.database_engine.can_native_upsert + ): + self.database_engine.lock_table(txn, "user_ips") for entry in iteritems(to_update): (user_id, access_token, ip), (user_agent, device_id, last_seen) = entry @@ -127,10 +354,10 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "user_id": user_id, "access_token": access_token, "ip": ip, - "user_agent": user_agent, - "device_id": device_id, }, values={ + "user_agent": user_agent, + "device_id": device_id, "last_seen": last_seen, }, lock=False, @@ -227,7 +454,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): results = {} for key in self._batch_row_update: - uid, access_token, ip = key + uid, access_token, ip, = key if uid == user_id: user_agent, _, last_seen = self._batch_row_update[key] results[(access_token, ip)] = (user_agent, last_seen) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index d10ff9e4b9..ecdab34e7d 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -22,14 +22,19 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList -from ._base import Cache, SQLBaseStore, db_to_json +from ._base import Cache, db_to_json logger = logging.getLogger(__name__) +DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( + "drop_device_list_streams_non_unique_indexes" +) -class DeviceStore(SQLBaseStore): + +class DeviceStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): super(DeviceStore, self).__init__(db_conn, hs) @@ -52,6 +57,30 @@ class DeviceStore(SQLBaseStore): columns=["user_id", "device_id"], ) + # create a unique index on device_lists_remote_cache + self.register_background_index_update( + "device_lists_remote_cache_unique_idx", + index_name="device_lists_remote_cache_unique_id", + table="device_lists_remote_cache", + columns=["user_id", "device_id"], + unique=True, + ) + + # And one on device_lists_remote_extremeties + self.register_background_index_update( + "device_lists_remote_extremeties_unique_idx", + index_name="device_lists_remote_extremeties_unique_idx", + table="device_lists_remote_extremeties", + columns=["user_id"], + unique=True, + ) + + # once they complete, we can remove the old non-unique indexes. + self.register_background_update_handler( + DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES, + self._drop_device_list_streams_non_unique_indexes, + ) + @defer.inlineCallbacks def store_device(self, user_id, device_id, initial_device_display_name): @@ -239,7 +268,19 @@ class DeviceStore(SQLBaseStore): def update_remote_device_list_cache_entry(self, user_id, device_id, content, stream_id): - """Updates a single user's device in the cache. + """Updates a single device in the cache of a remote user's devicelist. + + Note: assumes that we are the only thread that can be updating this user's + device list. + + Args: + user_id (str): User to update device list for + device_id (str): ID of decivice being updated + content (dict): new data on this device + stream_id (int): the version of the device list + + Returns: + Deferred[None] """ return self.runInteraction( "update_remote_device_list_cache_entry", @@ -272,7 +313,11 @@ class DeviceStore(SQLBaseStore): }, values={ "content": json.dumps(content), - } + }, + + # we don't need to lock, because we assume we are the only thread + # updating this user's devices. + lock=False, ) txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id,)) @@ -289,11 +334,26 @@ class DeviceStore(SQLBaseStore): }, values={ "stream_id": stream_id, - } + }, + + # again, we can assume we are the only thread updating this user's + # extremity. + lock=False, ) def update_remote_device_list_cache(self, user_id, devices, stream_id): - """Replace the cache of the remote user's devices. + """Replace the entire cache of the remote user's devices. + + Note: assumes that we are the only thread that can be updating this user's + device list. + + Args: + user_id (str): User to update device list for + devices (list[dict]): list of device objects supplied over federation + stream_id (int): the version of the device list + + Returns: + Deferred[None] """ return self.runInteraction( "update_remote_device_list_cache", @@ -338,7 +398,11 @@ class DeviceStore(SQLBaseStore): }, values={ "stream_id": stream_id, - } + }, + + # we don't need to lock, because we can assume we are the only thread + # updating this user's extremity. + lock=False, ) def get_devices_by_remote(self, destination, from_stream_id): @@ -589,10 +653,14 @@ class DeviceStore(SQLBaseStore): combined list of changes to devices, and which destinations need to be poked. `destination` may be None if no destinations need to be poked. """ + # We do a group by here as there can be a large number of duplicate + # entries, since we throw away device IDs. sql = """ - SELECT stream_id, user_id, destination FROM device_lists_stream + SELECT MAX(stream_id) AS stream_id, user_id, destination + FROM device_lists_stream LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) WHERE ? < stream_id AND stream_id <= ? + GROUP BY user_id, destination """ return self._execute( "get_all_device_list_changes_for_remotes", None, @@ -718,3 +786,19 @@ class DeviceStore(SQLBaseStore): "_prune_old_outbound_device_pokes", _prune_txn, ) + + @defer.inlineCallbacks + def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size): + def f(conn): + txn = conn.cursor() + txn.execute( + "DROP INDEX IF EXISTS device_lists_remote_cache_id" + ) + txn.execute( + "DROP INDEX IF EXISTS device_lists_remote_extremeties_id" + ) + txn.close() + + yield self.runWithConnection(f) + yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES) + defer.returnValue(1) diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index f25ded2295..9a3aec759e 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -118,6 +118,11 @@ class EndToEndRoomKeyStore(SQLBaseStore): these room keys. """ + try: + version = int(version) + except ValueError: + defer.returnValue({'rooms': {}}) + keyvalues = { "user_id": user_id, "version": version, @@ -177,7 +182,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): keyvalues = { "user_id": user_id, - "version": version, + "version": int(version), } if room_id: keyvalues['room_id'] = room_id @@ -212,14 +217,23 @@ class EndToEndRoomKeyStore(SQLBaseStore): Raises: StoreError: with code 404 if there are no e2e_room_keys_versions present Returns: - A deferred dict giving the info metadata for this backup version + A deferred dict giving the info metadata for this backup version, with + fields including: + version(str) + algorithm(str) + auth_data(object): opaque dict supplied by the client """ def _get_e2e_room_keys_version_info_txn(txn): if version is None: this_version = self._get_current_version(txn, user_id) else: - this_version = version + try: + this_version = int(version) + except ValueError: + # Our versions are all ints so if we can't convert it to an integer, + # it isn't there. + raise StoreError(404, "No row found") result = self._simple_select_one_txn( txn, @@ -236,6 +250,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): ), ) result["auth_data"] = json.loads(result["auth_data"]) + result["version"] = str(result["version"]) return result return self.runInteraction( @@ -283,6 +298,27 @@ class EndToEndRoomKeyStore(SQLBaseStore): "create_e2e_room_keys_version_txn", _create_e2e_room_keys_version_txn ) + def update_e2e_room_keys_version(self, user_id, version, info): + """Update a given backup version + + Args: + user_id(str): the user whose backup version we're updating + version(str): the version ID of the backup version we're updating + info(dict): the new backup version info to store + """ + + return self._simple_update( + table="e2e_room_keys_versions", + keyvalues={ + "user_id": user_id, + "version": version, + }, + updatevalues={ + "auth_data": json.dumps(info["auth_data"]), + }, + desc="update_e2e_room_keys_version" + ) + def delete_e2e_room_keys_version(self, user_id, version=None): """Delete a given backup version of the user's room keys. Doesn't delete their actual key data. diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 1f1721e820..2a0f6cfca9 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -40,7 +40,10 @@ class EndToEndKeyStore(SQLBaseStore): allow_none=True, ) - new_key_json = encode_canonical_json(device_keys) + # In py3 we need old_key_json to match new_key_json type. The DB + # returns unicode while encode_canonical_json returns bytes. + new_key_json = encode_canonical_json(device_keys).decode("utf-8") + if old_key_json == new_key_json: return False diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py index e2f9de8451..ff5ef97ca8 100644 --- a/synapse/storage/engines/__init__.py +++ b/synapse/storage/engines/__init__.py @@ -18,7 +18,7 @@ import platform from ._base import IncorrectDatabaseSetup from .postgres import PostgresEngine -from .sqlite3 import Sqlite3Engine +from .sqlite import Sqlite3Engine SUPPORTED_MODULE = { "sqlite3": Sqlite3Engine, diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 42225f8a2a..4004427c7b 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -38,6 +38,13 @@ class PostgresEngine(object): return sql.replace("?", "%s") def on_new_connection(self, db_conn): + + # Get the version of PostgreSQL that we're using. As per the psycopg2 + # docs: The number is formed by converting the major, minor, and + # revision numbers into two-decimal-digit numbers and appending them + # together. For example, version 8.1.5 will be returned as 80105 + self._version = db_conn.server_version + db_conn.set_isolation_level( self.module.extensions.ISOLATION_LEVEL_REPEATABLE_READ ) @@ -54,6 +61,13 @@ class PostgresEngine(object): cursor.close() + @property + def can_native_upsert(self): + """ + Can we use native UPSERTs? This requires PostgreSQL 9.5+. + """ + return self._version >= 90500 + def is_deadlock(self, error): if isinstance(error, self.module.DatabaseError): # https://www.postgresql.org/docs/current/static/errcodes-appendix.html diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite.py index 19949fc474..059ab81055 100644 --- a/synapse/storage/engines/sqlite3.py +++ b/synapse/storage/engines/sqlite.py @@ -30,6 +30,14 @@ class Sqlite3Engine(object): self._current_state_group_id = None self._current_state_group_id_lock = threading.Lock() + @property + def can_native_upsert(self): + """ + Do we support native UPSERTs? This requires SQLite3 3.24+, plus some + more work we haven't done yet to tell what was inserted vs updated. + """ + return self.module.sqlite_version_info >= (3, 24, 0) + def check_database(self, txn): pass diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 3faca2a042..38809ed0fc 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -125,6 +125,29 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, return dict(txn) + @defer.inlineCallbacks + def get_max_depth_of(self, event_ids): + """Returns the max depth of a set of event IDs + + Args: + event_ids (list[str]) + + Returns + Deferred[int] + """ + rows = yield self._simple_select_many_batch( + table="events", + column="event_id", + iterable=event_ids, + retcols=("depth",), + desc="get_max_depth_of", + ) + + if not rows: + defer.returnValue(0) + else: + defer.returnValue(max(row["depth"] for row in rows)) + def _get_oldest_events_in_room_txn(self, txn, room_id): return self._simple_select_onecol_txn( txn, @@ -477,7 +500,7 @@ class EventFederationStore(EventFederationWorkerStore): "is_state": False, } for ev in events - for e_id, _ in ev.prev_events + for e_id in ev.prev_event_ids() ], ) @@ -510,7 +533,7 @@ class EventFederationStore(EventFederationWorkerStore): txn.executemany(query, [ (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) - for ev in events for e_id, _ in ev.prev_events + for ev in events for e_id in ev.prev_event_ids() if not ev.internal_metadata.is_outlier() ]) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 8881b009df..06db9e56e6 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,6 +38,7 @@ from synapse.state import StateResolutionStore from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.events_worker import EventsWorkerStore +from synapse.storage.state import StateGroupWorkerStore from synapse.types import RoomStreamToken, get_domain_from_id from synapse.util import batch_iter from synapse.util.async_helpers import ObservableDeferred @@ -205,7 +206,8 @@ def _retry_on_integrity_error(func): # inherits from EventFederationStore so that we can call _update_backward_extremities # and _handle_mult_prev_events (though arguably those could both be moved in here) -class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore): +class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore, + BackgroundUpdateStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" @@ -414,7 +416,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore ) if len_1: all_single_prev_not_state = all( - len(event.prev_events) == 1 + len(event.prev_event_ids()) == 1 and not event.is_state() for event, ctx in ev_ctx_rm ) @@ -438,7 +440,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore # guess this by looking at the prev_events and checking # if they match the current forward extremities. for ev, _ in ev_ctx_rm: - prev_event_ids = set(e for e, _ in ev.prev_events) + prev_event_ids = set(ev.prev_event_ids()) if latest_event_ids == prev_event_ids: state_delta_reuse_delta_counter.inc() break @@ -549,7 +551,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore result.difference_update( e_id for event in new_events - for e_id, _ in event.prev_events + for e_id in event.prev_event_ids() ) # Finally, remove any events which are prev_events of any existing events. @@ -737,7 +739,18 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore } events_map = {ev.event_id: ev for ev, _ in events_context} - room_version = yield self.get_room_version(room_id) + + # We need to get the room version, which is in the create event. + # Normally that'd be in the database, but its also possible that we're + # currently trying to persist it. + room_version = None + for ev, _ in events_context: + if ev.type == EventTypes.Create and ev.state_key == "": + room_version = ev.content.get("room_version", "1") + break + + if not room_version: + room_version = yield self.get_room_version(room_id) logger.debug("calling resolve_state_groups from preserve_events") res = yield self._state_resolution_handler.resolve_state_groups( @@ -867,7 +880,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore "auth_id": auth_id, } for event, _ in events_and_contexts - for auth_id, _ in event.auth_events + for auth_id in event.auth_event_ids() if event.is_state() ], ) @@ -891,105 +904,82 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order): for room_id, current_state_tuple in iteritems(state_delta_by_room): - to_delete, to_insert = current_state_tuple - - # First we add entries to the current_state_delta_stream. We - # do this before updating the current_state_events table so - # that we can use it to calculate the `prev_event_id`. (This - # allows us to not have to pull out the existing state - # unnecessarily). - sql = """ - INSERT INTO current_state_delta_stream - (stream_id, room_id, type, state_key, event_id, prev_event_id) - SELECT ?, ?, ?, ?, ?, ( - SELECT event_id FROM current_state_events - WHERE room_id = ? AND type = ? AND state_key = ? - ) - """ - txn.executemany(sql, ( - ( - max_stream_order, room_id, etype, state_key, None, - room_id, etype, state_key, - ) - for etype, state_key in to_delete - # We sanity check that we're deleting rather than updating - if (etype, state_key) not in to_insert - )) - txn.executemany(sql, ( - ( - max_stream_order, room_id, etype, state_key, ev_id, - room_id, etype, state_key, - ) - for (etype, state_key), ev_id in iteritems(to_insert) - )) - - # Now we actually update the current_state_events table + to_delete, to_insert = current_state_tuple - txn.executemany( - "DELETE FROM current_state_events" - " WHERE room_id = ? AND type = ? AND state_key = ?", - ( - (room_id, etype, state_key) - for etype, state_key in itertools.chain(to_delete, to_insert) - ), + # First we add entries to the current_state_delta_stream. We + # do this before updating the current_state_events table so + # that we can use it to calculate the `prev_event_id`. (This + # allows us to not have to pull out the existing state + # unnecessarily). + sql = """ + INSERT INTO current_state_delta_stream + (stream_id, room_id, type, state_key, event_id, prev_event_id) + SELECT ?, ?, ?, ?, ?, ( + SELECT event_id FROM current_state_events + WHERE room_id = ? AND type = ? AND state_key = ? ) - - self._simple_insert_many_txn( - txn, - table="current_state_events", - values=[ - { - "event_id": ev_id, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - } - for key, ev_id in iteritems(to_insert) - ], + """ + txn.executemany(sql, ( + ( + max_stream_order, room_id, etype, state_key, None, + room_id, etype, state_key, ) - - txn.call_after( - self._curr_state_delta_stream_cache.entity_has_changed, - room_id, max_stream_order, + for etype, state_key in to_delete + # We sanity check that we're deleting rather than updating + if (etype, state_key) not in to_insert + )) + txn.executemany(sql, ( + ( + max_stream_order, room_id, etype, state_key, ev_id, + room_id, etype, state_key, ) + for (etype, state_key), ev_id in iteritems(to_insert) + )) - # Invalidate the various caches - - # Figure out the changes of membership to invalidate the - # `get_rooms_for_user` cache. - # We find out which membership events we may have deleted - # and which we have added, then we invlidate the caches for all - # those users. - members_changed = set( - state_key - for ev_type, state_key in itertools.chain(to_delete, to_insert) - if ev_type == EventTypes.Member - ) + # Now we actually update the current_state_events table - for member in members_changed: - self._invalidate_cache_and_stream( - txn, self.get_rooms_for_user_with_stream_ordering, (member,) - ) + txn.executemany( + "DELETE FROM current_state_events" + " WHERE room_id = ? AND type = ? AND state_key = ?", + ( + (room_id, etype, state_key) + for etype, state_key in itertools.chain(to_delete, to_insert) + ), + ) - for host in set(get_domain_from_id(u) for u in members_changed): - self._invalidate_cache_and_stream( - txn, self.is_host_joined, (room_id, host) - ) - self._invalidate_cache_and_stream( - txn, self.was_host_joined, (room_id, host) - ) + self._simple_insert_many_txn( + txn, + table="current_state_events", + values=[ + { + "event_id": ev_id, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + } + for key, ev_id in iteritems(to_insert) + ], + ) - self._invalidate_cache_and_stream( - txn, self.get_users_in_room, (room_id,) - ) + txn.call_after( + self._curr_state_delta_stream_cache.entity_has_changed, + room_id, max_stream_order, + ) - self._invalidate_cache_and_stream( - txn, self.get_room_summary, (room_id,) - ) + # Invalidate the various caches + + # Figure out the changes of membership to invalidate the + # `get_rooms_for_user` cache. + # We find out which membership events we may have deleted + # and which we have added, then we invlidate the caches for all + # those users. + members_changed = set( + state_key + for ev_type, state_key in itertools.chain(to_delete, to_insert) + if ev_type == EventTypes.Member + ) - self._invalidate_cache_and_stream( - txn, self.get_current_state_ids, (room_id,) - ) + self._invalidate_state_caches_and_stream(txn, room_id, members_changed) def _update_forward_extremities_txn(self, txn, new_forward_extremities, max_stream_order): @@ -1255,6 +1245,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore event.internal_metadata.get_dict() ), "json": encode_json(event_dict(event)), + "format_version": event.format_version, } for event, _ in events_and_contexts ], @@ -2034,55 +2025,37 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore logger.info("[purge] finding redundant state groups") - # Get all state groups that are only referenced by events that are - # to be deleted. - # This works by first getting state groups that we may want to delete, - # joining against event_to_state_groups to get events that use that - # state group, then left joining against events_to_purge again. Any - # state group where the left join produce *no nulls* are referenced - # only by events that are going to be purged. + # Get all state groups that are referenced by events that are to be + # deleted. We then go and check if they are referenced by other events + # or state groups, and if not we delete them. txn.execute(""" - SELECT state_group FROM - ( - SELECT DISTINCT state_group FROM events_to_purge - INNER JOIN event_to_state_groups USING (event_id) - ) AS sp - INNER JOIN event_to_state_groups USING (state_group) - LEFT JOIN events_to_purge AS ep USING (event_id) - GROUP BY state_group - HAVING SUM(CASE WHEN ep.event_id IS NULL THEN 1 ELSE 0 END) = 0 + SELECT DISTINCT state_group FROM events_to_purge + INNER JOIN event_to_state_groups USING (event_id) """) - state_rows = txn.fetchall() - logger.info("[purge] found %i redundant state groups", len(state_rows)) - - # make a set of the redundant state groups, so that we can look them up - # efficiently - state_groups_to_delete = set([sg for sg, in state_rows]) - - # Now we get all the state groups that rely on these state groups - logger.info("[purge] finding state groups which depend on redundant" - " state groups") - remaining_state_groups = [] - for i in range(0, len(state_rows), 100): - chunk = [sg for sg, in state_rows[i:i + 100]] - # look for state groups whose prev_state_group is one we are about - # to delete - rows = self._simple_select_many_txn( - txn, - table="state_group_edges", - column="prev_state_group", - iterable=chunk, - retcols=["state_group"], - keyvalues={}, - ) - remaining_state_groups.extend( - row["state_group"] for row in rows + referenced_state_groups = set(sg for sg, in txn) + logger.info( + "[purge] found %i referenced state groups", + len(referenced_state_groups), + ) + + logger.info("[purge] finding state groups that can be deleted") - # exclude state groups we are about to delete: no point in - # updating them - if row["state_group"] not in state_groups_to_delete + state_groups_to_delete, remaining_state_groups = ( + self._find_unreferenced_groups_during_purge( + txn, referenced_state_groups, ) + ) + + logger.info( + "[purge] found %i state groups to delete", + len(state_groups_to_delete), + ) + + logger.info( + "[purge] de-delta-ing %i remaining state groups", + len(remaining_state_groups), + ) # Now we turn the state groups that reference to-be-deleted state # groups to non delta versions. @@ -2127,11 +2100,11 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore logger.info("[purge] removing redundant state groups") txn.executemany( "DELETE FROM state_groups_state WHERE state_group = ?", - state_rows + ((sg,) for sg in state_groups_to_delete), ) txn.executemany( "DELETE FROM state_groups WHERE id = ?", - state_rows + ((sg,) for sg in state_groups_to_delete), ) logger.info("[purge] removing events from event_to_state_groups") @@ -2227,6 +2200,85 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore logger.info("[purge] done") + def _find_unreferenced_groups_during_purge(self, txn, state_groups): + """Used when purging history to figure out which state groups can be + deleted and which need to be de-delta'ed (due to one of its prev groups + being scheduled for deletion). + + Args: + txn + state_groups (set[int]): Set of state groups referenced by events + that are going to be deleted. + + Returns: + tuple[set[int], set[int]]: The set of state groups that can be + deleted and the set of state groups that need to be de-delta'ed + """ + # Graph of state group -> previous group + graph = {} + + # Set of events that we have found to be referenced by events + referenced_groups = set() + + # Set of state groups we've already seen + state_groups_seen = set(state_groups) + + # Set of state groups to handle next. + next_to_search = set(state_groups) + while next_to_search: + # We bound size of groups we're looking up at once, to stop the + # SQL query getting too big + if len(next_to_search) < 100: + current_search = next_to_search + next_to_search = set() + else: + current_search = set(itertools.islice(next_to_search, 100)) + next_to_search -= current_search + + # Check if state groups are referenced + sql = """ + SELECT DISTINCT state_group FROM event_to_state_groups + LEFT JOIN events_to_purge AS ep USING (event_id) + WHERE state_group IN (%s) AND ep.event_id IS NULL + """ % (",".join("?" for _ in current_search),) + txn.execute(sql, list(current_search)) + + referenced = set(sg for sg, in txn) + referenced_groups |= referenced + + # We don't continue iterating up the state group graphs for state + # groups that are referenced. + current_search -= referenced + + rows = self._simple_select_many_txn( + txn, + table="state_group_edges", + column="prev_state_group", + iterable=current_search, + keyvalues={}, + retcols=("prev_state_group", "state_group",), + ) + + prevs = set(row["state_group"] for row in rows) + # We don't bother re-handling groups we've already seen + prevs -= state_groups_seen + next_to_search |= prevs + state_groups_seen |= prevs + + for row in rows: + # Note: Each state group can have at most one prev group + graph[row["state_group"]] = row["prev_state_group"] + + to_delete = state_groups_seen - referenced_groups + + to_dedelta = set() + for sg in referenced_groups: + prev_sg = graph.get(sg) + if prev_sg and prev_sg in to_delete: + to_dedelta.add(sg) + + return to_delete, to_dedelta + @defer.inlineCallbacks def is_event_after(self, event_id1, event_id2): """Returns True if event_id1 is after event_id2 in the stream diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index a8326f5296..1716be529a 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -21,13 +21,14 @@ from canonicaljson import json from twisted.internet import defer +from synapse.api.constants import EventFormatVersions, EventTypes from synapse.api.errors import NotFoundError +from synapse.events import FrozenEvent, event_type_from_format_version # noqa: F401 # these are only included to make the type annotations work -from synapse.events import EventBase # noqa: F401 -from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.types import get_domain_from_id from synapse.util.logcontext import ( LoggingContext, PreserveLoggingContext, @@ -160,9 +161,14 @@ class EventsWorkerStore(SQLBaseStore): log_ctx = LoggingContext.current_context() log_ctx.record_event_fetch(len(missing_events_ids)) + # Note that _enqueue_events is also responsible for turning db rows + # into FrozenEvents (via _get_event_from_row), which involves seeing if + # the events have been redacted, and if so pulling the redaction event out + # of the database to check it. + # + # _enqueue_events is a bit of a rubbish name but naming is hard. missing_events = yield self._enqueue_events( missing_events_ids, - check_redacted=check_redacted, allow_rejected=allow_rejected, ) @@ -174,6 +180,50 @@ class EventsWorkerStore(SQLBaseStore): if not entry: continue + # Starting in room version v3, some redactions need to be rechecked if we + # didn't have the redacted event at the time, so we recheck on read + # instead. + if not allow_rejected and entry.event.type == EventTypes.Redaction: + if entry.event.internal_metadata.need_to_check_redaction(): + # XXX: we need to avoid calling get_event here. + # + # The problem is that we end up at this point when an event + # which has been redacted is pulled out of the database by + # _enqueue_events, because _enqueue_events needs to check the + # redaction before it can cache the redacted event. So obviously, + # calling get_event to get the redacted event out of the database + # gives us an infinite loop. + # + # For now (quick hack to fix during 0.99 release cycle), we just + # go and fetch the relevant row from the db, but it would be nice + # to think about how we can cache this rather than hit the db + # every time we access a redaction event. + # + # One thought on how to do this: + # 1. split _get_events up so that it is divided into (a) get the + # rawish event from the db/cache, (b) do the redaction/rejection + # filtering + # 2. have _get_event_from_row just call the first half of that + + orig_sender = yield self._simple_select_one_onecol( + table="events", + keyvalues={"event_id": entry.event.redacts}, + retcol="sender", + allow_none=True, + ) + + expected_domain = get_domain_from_id(entry.event.sender) + if orig_sender and get_domain_from_id(orig_sender) == expected_domain: + # This redaction event is allowed. Mark as not needing a + # recheck. + entry.event.internal_metadata.recheck_redaction = False + else: + # We don't have the event that is being redacted, so we + # assume that the event isn't authorized for now. (If we + # later receive the event, then we will always redact + # it anyway, since we have this redaction) + continue + if allow_rejected or not entry.event.rejected_reason: if check_redacted and entry.redacted_event: event = entry.redacted_event @@ -197,7 +247,7 @@ class EventsWorkerStore(SQLBaseStore): defer.returnValue(events) def _invalidate_get_event_cache(self, event_id): - self._get_event_cache.invalidate((event_id,)) + self._get_event_cache.invalidate((event_id,)) def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): """Fetch events from the caches @@ -310,7 +360,7 @@ class EventsWorkerStore(SQLBaseStore): self.hs.get_reactor().callFromThread(fire, event_list, e) @defer.inlineCallbacks - def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): + def _enqueue_events(self, events, allow_rejected=False): """Fetches events from the database using the _event_fetch_list. This allows batch and bulk fetching of events - it allows us to fetch events without having to create a new transaction for each request for events. @@ -353,6 +403,7 @@ class EventsWorkerStore(SQLBaseStore): self._get_event_from_row, row["internal_metadata"], row["json"], row["redacts"], rejected_reason=row["rejects"], + format_version=row["format_version"], ) for row in rows ], @@ -377,6 +428,7 @@ class EventsWorkerStore(SQLBaseStore): " e.event_id as event_id, " " e.internal_metadata," " e.json," + " e.format_version, " " r.redacts as redacts," " rej.event_id as rejects " " FROM event_json as e" @@ -392,7 +444,7 @@ class EventsWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_event_from_row(self, internal_metadata, js, redacted, - rejected_reason=None): + format_version, rejected_reason=None): with Measure(self._clock, "_get_event_from_row"): d = json.loads(js) internal_metadata = json.loads(internal_metadata) @@ -405,8 +457,13 @@ class EventsWorkerStore(SQLBaseStore): desc="_get_event_from_row_rejected_reason", ) - original_ev = FrozenEvent( - d, + if format_version is None: + # This means that we stored the event before we had the concept + # of a event format version, so it must be a V1 event. + format_version = EventFormatVersions.V1 + + original_ev = event_type_from_format_version(format_version)( + event_dict=d, internal_metadata_dict=internal_metadata, rejected_reason=rejected_reason, ) @@ -436,6 +493,19 @@ class EventsWorkerStore(SQLBaseStore): # will serialise this field correctly redacted_event.unsigned["redacted_because"] = because + # Starting in room version v3, some redactions need to be + # rechecked if we didn't have the redacted event at the + # time, so we recheck on read instead. + if because.internal_metadata.need_to_check_redaction(): + expected_domain = get_domain_from_id(original_ev.sender) + if get_domain_from_id(because.sender) == expected_domain: + # This redaction event is allowed. Mark as not needing a + # recheck. + because.internal_metadata.recheck_redaction = False + else: + # Senders don't match, so the event isn't actually redacted + redacted_event = None + cache_entry = _EventCacheEntry( event=original_ev, redacted_event=redacted_event, diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index cf4104dc2e..9e7e09b8c1 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -34,8 +34,9 @@ class MonthlyActiveUsersStore(SQLBaseStore): self.hs = hs self.reserved_users = () # Do not add more reserved users than the total allowable number - self._initialise_reserved_users( - dbconn.cursor(), + self._new_transaction( + dbconn, "initialise_mau_threepids", [], [], + self._initialise_reserved_users, hs.config.mau_limits_reserved_threepids[:self.hs.config.max_mau_value], ) @@ -54,9 +55,12 @@ class MonthlyActiveUsersStore(SQLBaseStore): txn, tp["medium"], tp["address"] ) + if user_id: - self.upsert_monthly_active_user_txn(txn, user_id) - reserved_user_list.append(user_id) + is_support = self.is_support_user_txn(txn, user_id) + if not is_support: + self.upsert_monthly_active_user_txn(txn, user_id) + reserved_user_list.append(user_id) else: logger.warning( "mau limit reserved threepid %s not found in db" % tp @@ -96,37 +100,38 @@ class MonthlyActiveUsersStore(SQLBaseStore): txn.execute(sql, query_args) - # If MAU user count still exceeds the MAU threshold, then delete on - # a least recently active basis. - # Note it is not possible to write this query using OFFSET due to - # incompatibilities in how sqlite and postgres support the feature. - # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present - # While Postgres does not require 'LIMIT', but also does not support - # negative LIMIT values. So there is no way to write it that both can - # support - safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) - # Must be greater than zero for postgres - safe_guard = safe_guard if safe_guard > 0 else 0 - query_args = [safe_guard] - - base_sql = """ - DELETE FROM monthly_active_users - WHERE user_id NOT IN ( - SELECT user_id FROM monthly_active_users - ORDER BY timestamp DESC - LIMIT ? + if self.hs.config.limit_usage_by_mau: + # If MAU user count still exceeds the MAU threshold, then delete on + # a least recently active basis. + # Note it is not possible to write this query using OFFSET due to + # incompatibilities in how sqlite and postgres support the feature. + # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present + # While Postgres does not require 'LIMIT', but also does not support + # negative LIMIT values. So there is no way to write it that both can + # support + safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) + # Must be greater than zero for postgres + safe_guard = safe_guard if safe_guard > 0 else 0 + query_args = [safe_guard] + + base_sql = """ + DELETE FROM monthly_active_users + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + ORDER BY timestamp DESC + LIMIT ? + ) + """ + # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres + # when len(reserved_users) == 0. Works fine on sqlite. + if len(self.reserved_users) > 0: + query_args.extend(self.reserved_users) + sql = base_sql + """ AND user_id NOT IN ({})""".format( + ','.join(questionmarks) ) - """ - # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres - # when len(reserved_users) == 0. Works fine on sqlite. - if len(self.reserved_users) > 0: - query_args.extend(self.reserved_users) - sql = base_sql + """ AND user_id NOT IN ({})""".format( - ','.join(questionmarks) - ) - else: - sql = base_sql - txn.execute(sql, query_args) + else: + sql = base_sql + txn.execute(sql, query_args) yield self.runInteraction("reap_monthly_active_users", _reap_users) # It seems poor to invalidate the whole cache, Postgres supports @@ -180,15 +185,33 @@ class MonthlyActiveUsersStore(SQLBaseStore): Args: user_id (str): user to add/update """ - is_insert = yield self.runInteraction( + # Support user never to be included in MAU stats. Note I can't easily call this + # from upsert_monthly_active_user_txn because then I need a _txn form of + # is_support_user which is complicated because I want to cache the result. + # Therefore I call it here and ignore the case where + # upsert_monthly_active_user_txn is called directly from + # _initialise_reserved_users reasoning that it would be very strange to + # include a support user in this context. + + is_support = yield self.is_support_user(user_id) + if is_support: + return + + yield self.runInteraction( "upsert_monthly_active_user", self.upsert_monthly_active_user_txn, user_id ) - if is_insert: - self.user_last_seen_monthly_active.invalidate((user_id,)) + user_in_mau = self.user_last_seen_monthly_active.cache.get( + (user_id,), + None, + update_metrics=False + ) + if user_in_mau is None: self.get_monthly_active_count.invalidate(()) + self.user_last_seen_monthly_active.invalidate((user_id,)) + def upsert_monthly_active_user_txn(self, txn, user_id): """Updates or inserts monthly active user member @@ -198,6 +221,16 @@ class MonthlyActiveUsersStore(SQLBaseStore): in a database thread rather than the main thread, and we can't call txn.call_after because txn may not be a LoggingTransaction. + We consciously do not call is_support_txn from this method because it + is not possible to cache the response. is_support_txn will be false in + almost all cases, so it seems reasonable to call it only for + upsert_monthly_active_user and to call is_support_txn manually + for cases where upsert_monthly_active_user_txn is called directly, + like _initialise_reserved_users + + In short, don't call this method with support users. (Support users + should not appear in the MAU stats). + Args: txn (cursor): user_id (str): user to add/update @@ -206,6 +239,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): bool: True if a new entry was created, False if an existing one was updated. """ + # Am consciously deciding to lock the table on the basis that is ought # never be a big table and alternative approaches (batching multiple # upserts into a single txn) introduced a lot of extra complexity. @@ -252,8 +286,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): Args: user_id(str): the user_id to query """ - - if self.hs.config.limit_usage_by_mau: + if self.hs.config.limit_usage_by_mau or self.hs.config.mau_stats_only: # Trial users and guests should not be included as part of MAU group is_guest = yield self.is_guest(user_id) if is_guest: @@ -271,8 +304,14 @@ class MonthlyActiveUsersStore(SQLBaseStore): # but only update if we have not previously seen the user for # LAST_SEEN_GRANULARITY ms if last_seen_timestamp is None: - count = yield self.get_monthly_active_count() - if count < self.hs.config.max_mau_value: + # In the case where mau_stats_only is True and limit_usage_by_mau is + # False, there is no point in checking get_monthly_active_count - it + # adds no value and will break the logic if max_mau_value is exceeded. + if not self.hs.config.limit_usage_by_mau: yield self.upsert_monthly_active_user(user_id) + else: + count = yield self.get_monthly_active_count() + if count < self.hs.config.max_mau_value: + yield self.upsert_monthly_active_user(user_id) elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY: yield self.upsert_monthly_active_user(user_id) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index b364719312..fa36daac52 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 51 +SCHEMA_VERSION = 53 dir_path = os.path.abspath(os.path.dirname(__file__)) @@ -257,7 +257,7 @@ def _upgrade_existing_database(cur, current_version, applied_delta_files, module.run_create(cur, database_engine) if not is_empty: module.run_upgrade(cur, database_engine, config=config) - elif ext == ".pyc": + elif ext == ".pyc" or file_name == "__pycache__": # Sometimes .pyc files turn up anyway even though we've # disabled their generation; e.g. from distribution package # installers. Silently skip it diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index 2743b52bad..134297e284 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -215,7 +215,7 @@ class PusherStore(PusherWorkerStore): with self._pushers_id_gen.get_next() as stream_id: # no need to lock because `pushers` has a unique key on # (app_id, pushkey, user_name) so _simple_upsert will retry - newly_inserted = yield self._simple_upsert( + yield self._simple_upsert( table="pushers", keyvalues={ "app_id": app_id, @@ -238,7 +238,12 @@ class PusherStore(PusherWorkerStore): lock=False, ) - if newly_inserted: + user_has_pusher = self.get_if_user_has_pusher.cache.get( + (user_id,), None, update_metrics=False + ) + + if user_has_pusher is not True: + # invalidate, since we the user might not have had a pusher before yield self.runInteraction( "add_pusher", self._invalidate_cache_and_stream, diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 80d76bf9d7..9b9572890b 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -19,9 +19,11 @@ from six.moves import range from twisted.internet import defer +from synapse.api.constants import UserTypes from synapse.api.errors import Codes, StoreError from synapse.storage import background_updates from synapse.storage._base import SQLBaseStore +from synapse.types import UserID from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -112,6 +114,187 @@ class RegistrationWorkerStore(SQLBaseStore): return None + @cachedInlineCallbacks() + def is_support_user(self, user_id): + """Determines if the user is of type UserTypes.SUPPORT + + Args: + user_id (str): user id to test + + Returns: + Deferred[bool]: True if user is of type UserTypes.SUPPORT + """ + res = yield self.runInteraction( + "is_support_user", self.is_support_user_txn, user_id + ) + defer.returnValue(res) + + def is_support_user_txn(self, txn, user_id): + res = self._simple_select_one_onecol_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + retcol="user_type", + allow_none=True, + ) + return True if res == UserTypes.SUPPORT else False + + def get_users_by_id_case_insensitive(self, user_id): + """Gets users that match user_id case insensitively. + Returns a mapping of user_id -> password_hash. + """ + def f(txn): + sql = ( + "SELECT name, password_hash FROM users" + " WHERE lower(name) = lower(?)" + ) + txn.execute(sql, (user_id,)) + return dict(txn) + + return self.runInteraction("get_users_by_id_case_insensitive", f) + + @defer.inlineCallbacks + def count_all_users(self): + """Counts all users registered on the homeserver.""" + def _count_users(txn): + txn.execute("SELECT COUNT(*) AS users FROM users") + rows = self.cursor_to_dict(txn) + if rows: + return rows[0]["users"] + return 0 + + ret = yield self.runInteraction("count_users", _count_users) + defer.returnValue(ret) + + def count_daily_user_type(self): + """ + Counts 1) native non guest users + 2) native guests users + 3) bridged users + who registered on the homeserver in the past 24 hours + """ + def _count_daily_user_type(txn): + yesterday = int(self._clock.time()) - (60 * 60 * 24) + + sql = """ + SELECT user_type, COALESCE(count(*), 0) AS count FROM ( + SELECT + CASE + WHEN is_guest=0 AND appservice_id IS NULL THEN 'native' + WHEN is_guest=1 AND appservice_id IS NULL THEN 'guest' + WHEN is_guest=0 AND appservice_id IS NOT NULL THEN 'bridged' + END AS user_type + FROM users + WHERE creation_ts > ? + ) AS t GROUP BY user_type + """ + results = {'native': 0, 'guest': 0, 'bridged': 0} + txn.execute(sql, (yesterday,)) + for row in txn: + results[row[0]] = row[1] + return results + return self.runInteraction("count_daily_user_type", _count_daily_user_type) + + @defer.inlineCallbacks + def count_nonbridged_users(self): + def _count_users(txn): + txn.execute(""" + SELECT COALESCE(COUNT(*), 0) FROM users + WHERE appservice_id IS NULL + """) + count, = txn.fetchone() + return count + + ret = yield self.runInteraction("count_users", _count_users) + defer.returnValue(ret) + + @defer.inlineCallbacks + def find_next_generated_user_id_localpart(self): + """ + Gets the localpart of the next generated user ID. + + Generated user IDs are integers, and we aim for them to be as small as + we can. Unfortunately, it's possible some of them are already taken by + existing users, and there may be gaps in the already taken range. This + function returns the start of the first allocatable gap. This is to + avoid the case of ID 10000000 being pre-allocated, so us wasting the + first (and shortest) many generated user IDs. + """ + def _find_next_generated_user_id(txn): + txn.execute("SELECT name FROM users") + + regex = re.compile(r"^@(\d+):") + + found = set() + + for user_id, in txn: + match = regex.search(user_id) + if match: + found.add(int(match.group(1))) + for i in range(len(found) + 1): + if i not in found: + return i + + defer.returnValue((yield self.runInteraction( + "find_next_generated_user_id", + _find_next_generated_user_id + ))) + + @defer.inlineCallbacks + def get_3pid_guest_access_token(self, medium, address): + ret = yield self._simple_select_one( + "threepid_guest_access_tokens", + { + "medium": medium, + "address": address + }, + ["guest_access_token"], True, 'get_3pid_guest_access_token' + ) + if ret: + defer.returnValue(ret["guest_access_token"]) + defer.returnValue(None) + + @defer.inlineCallbacks + def get_user_id_by_threepid(self, medium, address): + """Returns user id from threepid + + Args: + medium (str): threepid medium e.g. email + address (str): threepid address e.g. me@example.com + + Returns: + Deferred[str|None]: user id or None if no user id/threepid mapping exists + """ + user_id = yield self.runInteraction( + "get_user_id_by_threepid", self.get_user_id_by_threepid_txn, + medium, address + ) + defer.returnValue(user_id) + + def get_user_id_by_threepid_txn(self, txn, medium, address): + """Returns user id from threepid + + Args: + txn (cursor): + medium (str): threepid medium e.g. email + address (str): threepid address e.g. me@example.com + + Returns: + str|None: user id or None if no user id/threepid mapping exists + """ + ret = self._simple_select_one_txn( + txn, + "user_threepids", + { + "medium": medium, + "address": address + }, + ['user_id'], True + ) + if ret: + return ret['user_id'] + return None + class RegistrationStore(RegistrationWorkerStore, background_updates.BackgroundUpdateStore): @@ -167,7 +350,7 @@ class RegistrationStore(RegistrationWorkerStore, def register(self, user_id, token=None, password_hash=None, was_guest=False, make_guest=False, appservice_id=None, - create_profile_with_localpart=None, admin=False): + create_profile_with_displayname=None, admin=False, user_type=None): """Attempts to register an account. Args: @@ -181,8 +364,12 @@ class RegistrationStore(RegistrationWorkerStore, make_guest (boolean): True if the the new user should be guest, false to add a regular user account. appservice_id (str): The ID of the appservice registering the user. - create_profile_with_localpart (str): Optionally create a profile for - the given localpart. + create_profile_with_displayname (unicode): Optionally create a profile for + the user, setting their displayname to the given value + admin (boolean): is an admin user? + user_type (str|None): type of user. One of the values from + api.constants.UserTypes, or None for a normal user. + Raises: StoreError if the user_id could not be registered. """ @@ -195,8 +382,9 @@ class RegistrationStore(RegistrationWorkerStore, was_guest, make_guest, appservice_id, - create_profile_with_localpart, - admin + create_profile_with_displayname, + admin, + user_type ) def _register( @@ -208,9 +396,12 @@ class RegistrationStore(RegistrationWorkerStore, was_guest, make_guest, appservice_id, - create_profile_with_localpart, + create_profile_with_displayname, admin, + user_type, ): + user_id_obj = UserID.from_string(user_id) + now = int(self.clock.time()) next_id = self._access_tokens_id_gen.get_next() @@ -244,6 +435,7 @@ class RegistrationStore(RegistrationWorkerStore, "is_guest": 1 if make_guest else 0, "appservice_id": appservice_id, "admin": 1 if admin else 0, + "user_type": user_type, } ) else: @@ -257,6 +449,7 @@ class RegistrationStore(RegistrationWorkerStore, "is_guest": 1 if make_guest else 0, "appservice_id": appservice_id, "admin": 1 if admin else 0, + "user_type": user_type, } ) except self.database_engine.module.IntegrityError: @@ -273,12 +466,15 @@ class RegistrationStore(RegistrationWorkerStore, (next_id, user_id, token,) ) - if create_profile_with_localpart: + if create_profile_with_displayname: # set a default displayname serverside to avoid ugly race # between auto-joins and clients trying to set displaynames + # + # *obviously* the 'profiles' table uses localpart for user_id + # while everything else uses the full mxid. txn.execute( "INSERT INTO profiles(user_id, displayname) VALUES (?,?)", - (create_profile_with_localpart, create_profile_with_localpart) + (user_id_obj.localpart, create_profile_with_displayname) ) self._invalidate_cache_and_stream( @@ -286,20 +482,6 @@ class RegistrationStore(RegistrationWorkerStore, ) txn.call_after(self.is_guest.invalidate, (user_id,)) - def get_users_by_id_case_insensitive(self, user_id): - """Gets users that match user_id case insensitively. - Returns a mapping of user_id -> password_hash. - """ - def f(txn): - sql = ( - "SELECT name, password_hash FROM users" - " WHERE lower(name) = lower(?)" - ) - txn.execute(sql, (user_id,)) - return dict(txn) - - return self.runInteraction("get_users_by_id_case_insensitive", f) - def user_set_password_hash(self, user_id, password_hash): """ NB. This does *not* evict any cache because the one use for this @@ -472,47 +654,6 @@ class RegistrationStore(RegistrationWorkerStore, ) defer.returnValue(ret) - @defer.inlineCallbacks - def get_user_id_by_threepid(self, medium, address): - """Returns user id from threepid - - Args: - medium (str): threepid medium e.g. email - address (str): threepid address e.g. me@example.com - - Returns: - Deferred[str|None]: user id or None if no user id/threepid mapping exists - """ - user_id = yield self.runInteraction( - "get_user_id_by_threepid", self.get_user_id_by_threepid_txn, - medium, address - ) - defer.returnValue(user_id) - - def get_user_id_by_threepid_txn(self, txn, medium, address): - """Returns user id from threepid - - Args: - txn (cursor): - medium (str): threepid medium e.g. email - address (str): threepid address e.g. me@example.com - - Returns: - str|None: user id or None if no user id/threepid mapping exists - """ - ret = self._simple_select_one_txn( - txn, - "user_threepids", - { - "medium": medium, - "address": address - }, - ['user_id'], True - ) - if ret: - return ret['user_id'] - return None - def user_delete_threepid(self, user_id, medium, address): return self._simple_delete( "user_threepids", @@ -525,107 +666,6 @@ class RegistrationStore(RegistrationWorkerStore, ) @defer.inlineCallbacks - def count_all_users(self): - """Counts all users registered on the homeserver.""" - def _count_users(txn): - txn.execute("SELECT COUNT(*) AS users FROM users") - rows = self.cursor_to_dict(txn) - if rows: - return rows[0]["users"] - return 0 - - ret = yield self.runInteraction("count_users", _count_users) - defer.returnValue(ret) - - def count_daily_user_type(self): - """ - Counts 1) native non guest users - 2) native guests users - 3) bridged users - who registered on the homeserver in the past 24 hours - """ - def _count_daily_user_type(txn): - yesterday = int(self._clock.time()) - (60 * 60 * 24) - - sql = """ - SELECT user_type, COALESCE(count(*), 0) AS count FROM ( - SELECT - CASE - WHEN is_guest=0 AND appservice_id IS NULL THEN 'native' - WHEN is_guest=1 AND appservice_id IS NULL THEN 'guest' - WHEN is_guest=0 AND appservice_id IS NOT NULL THEN 'bridged' - END AS user_type - FROM users - WHERE creation_ts > ? - ) AS t GROUP BY user_type - """ - results = {'native': 0, 'guest': 0, 'bridged': 0} - txn.execute(sql, (yesterday,)) - for row in txn: - results[row[0]] = row[1] - return results - return self.runInteraction("count_daily_user_type", _count_daily_user_type) - - @defer.inlineCallbacks - def count_nonbridged_users(self): - def _count_users(txn): - txn.execute(""" - SELECT COALESCE(COUNT(*), 0) FROM users - WHERE appservice_id IS NULL - """) - count, = txn.fetchone() - return count - - ret = yield self.runInteraction("count_users", _count_users) - defer.returnValue(ret) - - @defer.inlineCallbacks - def find_next_generated_user_id_localpart(self): - """ - Gets the localpart of the next generated user ID. - - Generated user IDs are integers, and we aim for them to be as small as - we can. Unfortunately, it's possible some of them are already taken by - existing users, and there may be gaps in the already taken range. This - function returns the start of the first allocatable gap. This is to - avoid the case of ID 10000000 being pre-allocated, so us wasting the - first (and shortest) many generated user IDs. - """ - def _find_next_generated_user_id(txn): - txn.execute("SELECT name FROM users") - - regex = re.compile(r"^@(\d+):") - - found = set() - - for user_id, in txn: - match = regex.search(user_id) - if match: - found.add(int(match.group(1))) - for i in range(len(found) + 1): - if i not in found: - return i - - defer.returnValue((yield self.runInteraction( - "find_next_generated_user_id", - _find_next_generated_user_id - ))) - - @defer.inlineCallbacks - def get_3pid_guest_access_token(self, medium, address): - ret = yield self._simple_select_one( - "threepid_guest_access_tokens", - { - "medium": medium, - "address": address - }, - ["guest_access_token"], True, 'get_3pid_guest_access_token' - ) - if ret: - defer.returnValue(ret["guest_access_token"]) - defer.returnValue(None) - - @defer.inlineCallbacks def save_or_get_3pid_guest_access_token( self, medium, address, access_token, inviter_user_id ): diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 61013b8919..41c65e112a 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -47,7 +47,7 @@ class RoomWorkerStore(SQLBaseStore): Args: room_id (str): The ID of the room to retrieve. Returns: - A namedtuple containing the room information, or an empty list. + A dict containing the room information, or None if the room is unknown. """ return self._simple_select_one( table="rooms", diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 0707f9a86a..592c1bcd33 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -588,12 +588,12 @@ class RoomMemberStore(RoomMemberWorkerStore): ) # We update the local_invites table only if the event is "current", - # i.e., its something that has just happened. - # The only current event that can also be an outlier is if its an - # invite that has come in across federation. + # i.e., its something that has just happened. If the event is an + # outlier it is only current if its an "out of band membership", + # like a remote invite or a rejection of a remote invite. is_new_state = not backfilled and ( not event.internal_metadata.is_outlier() - or event.internal_metadata.is_invite_from_remote() + or event.internal_metadata.is_out_of_band_membership() ) is_mine = self.hs.is_mine_id(event.state_key) if is_new_state and is_mine: diff --git a/synapse/storage/schema/delta/40/device_list_streams.sql b/synapse/storage/schema/delta/40/device_list_streams.sql index 54841b3843..dd6dcb65f1 100644 --- a/synapse/storage/schema/delta/40/device_list_streams.sql +++ b/synapse/storage/schema/delta/40/device_list_streams.sql @@ -20,9 +20,6 @@ CREATE TABLE device_lists_remote_cache ( content TEXT NOT NULL ); -CREATE INDEX device_lists_remote_cache_id ON device_lists_remote_cache(user_id, device_id); - - -- The last update we got for a user. Empty if we're not receiving updates for -- that user. CREATE TABLE device_lists_remote_extremeties ( @@ -30,7 +27,11 @@ CREATE TABLE device_lists_remote_extremeties ( stream_id TEXT NOT NULL ); -CREATE INDEX device_lists_remote_extremeties_id ON device_lists_remote_extremeties(user_id, stream_id); +-- we used to create non-unique indexes on these tables, but as of update 52 we create +-- unique indexes concurrently: +-- +-- CREATE INDEX device_lists_remote_cache_id ON device_lists_remote_cache(user_id, device_id); +-- CREATE INDEX device_lists_remote_extremeties_id ON device_lists_remote_extremeties(user_id, stream_id); -- Stream of device lists updates. Includes both local and remotes diff --git a/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql b/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql new file mode 100644 index 0000000000..91e03d13e1 --- /dev/null +++ b/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql @@ -0,0 +1,19 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- This is needed to efficiently check for unreferenced state groups during +-- purge. Added events_to_state_group(state_group) index +INSERT into background_updates (update_name, progress_json) + VALUES ('event_to_state_groups_sg_index', '{}'); diff --git a/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql b/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql new file mode 100644 index 0000000000..bfa49e6f92 --- /dev/null +++ b/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql @@ -0,0 +1,36 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- register a background update which will create a unique index on +-- device_lists_remote_cache +INSERT into background_updates (update_name, progress_json) + VALUES ('device_lists_remote_cache_unique_idx', '{}'); + +-- and one on device_lists_remote_extremeties +INSERT into background_updates (update_name, progress_json, depends_on) + VALUES ( + 'device_lists_remote_extremeties_unique_idx', '{}', + + -- doesn't really depend on this, but we need to make sure both happen + -- before we drop the old indexes. + 'device_lists_remote_cache_unique_idx' + ); + +-- once they complete, we can drop the old indexes. +INSERT into background_updates (update_name, progress_json, depends_on) + VALUES ( + 'drop_device_list_streams_non_unique_indexes', '{}', + 'device_lists_remote_extremeties_unique_idx' + ); diff --git a/synapse/storage/schema/delta/52/e2e_room_keys.sql b/synapse/storage/schema/delta/52/e2e_room_keys.sql new file mode 100644 index 0000000000..db687cccae --- /dev/null +++ b/synapse/storage/schema/delta/52/e2e_room_keys.sql @@ -0,0 +1,53 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Change version column to an integer so we can do MAX() sensibly + */ +CREATE TABLE e2e_room_keys_versions_new ( + user_id TEXT NOT NULL, + version BIGINT NOT NULL, + algorithm TEXT NOT NULL, + auth_data TEXT NOT NULL, + deleted SMALLINT DEFAULT 0 NOT NULL +); + +INSERT INTO e2e_room_keys_versions_new + SELECT user_id, CAST(version as BIGINT), algorithm, auth_data, deleted FROM e2e_room_keys_versions; + +DROP TABLE e2e_room_keys_versions; +ALTER TABLE e2e_room_keys_versions_new RENAME TO e2e_room_keys_versions; + +CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version); + +/* Change e2e_rooms_keys to match + */ +CREATE TABLE e2e_room_keys_new ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + session_id TEXT NOT NULL, + version BIGINT NOT NULL, + first_message_index INT, + forwarded_count INT, + is_verified BOOLEAN, + session_data TEXT NOT NULL +); + +INSERT INTO e2e_room_keys_new + SELECT user_id, room_id, session_id, CAST(version as BIGINT), first_message_index, forwarded_count, is_verified, session_data FROM e2e_room_keys; + +DROP TABLE e2e_room_keys; +ALTER TABLE e2e_room_keys_new RENAME TO e2e_room_keys; + +CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys(user_id, room_id, session_id); diff --git a/synapse/storage/schema/delta/53/add_user_type_to_users.sql b/synapse/storage/schema/delta/53/add_user_type_to_users.sql new file mode 100644 index 0000000000..88ec2f83e5 --- /dev/null +++ b/synapse/storage/schema/delta/53/add_user_type_to_users.sql @@ -0,0 +1,19 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* The type of the user: NULL for a regular user, or one of the constants in + * synapse.api.constants.UserTypes + */ +ALTER TABLE users ADD COLUMN user_type TEXT DEFAULT NULL; diff --git a/synapse/storage/schema/delta/11/v11.sql b/synapse/storage/schema/delta/53/drop_sent_transactions.sql index e7b4f90127..e372f5a44a 100644 --- a/synapse/storage/schema/delta/11/v11.sql +++ b/synapse/storage/schema/delta/53/drop_sent_transactions.sql @@ -1,4 +1,4 @@ -/* Copyright 2015, 2016 OpenMarket Ltd +/* Copyright 2018 New Vector Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,4 +13,4 @@ * limitations under the License. */ -CREATE INDEX IF NOT EXISTS sent_transaction_txn_id ON sent_transactions(transaction_id); \ No newline at end of file +DROP TABLE IF EXISTS sent_transactions; diff --git a/synapse/storage/schema/delta/53/event_format_version.sql b/synapse/storage/schema/delta/53/event_format_version.sql new file mode 100644 index 0000000000..1d977c2834 --- /dev/null +++ b/synapse/storage/schema/delta/53/event_format_version.sql @@ -0,0 +1,16 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE event_json ADD COLUMN format_version INTEGER; diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/schema/delta/53/user_ips_index.sql new file mode 100644 index 0000000000..b812c5794f --- /dev/null +++ b/synapse/storage/schema/delta/53/user_ips_index.sql @@ -0,0 +1,30 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + -- analyze user_ips, to help ensure the correct indices are used +INSERT INTO background_updates (update_name, progress_json) VALUES + ('user_ips_analyze', '{}'); + +-- delete duplicates +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('user_ips_remove_dupes', '{}', 'user_ips_analyze'); + +-- add a new unique index to user_ips table +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('user_ips_device_unique_index', '{}', 'user_ips_remove_dupes'); + +-- drop the old original index +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); diff --git a/synapse/storage/schema/full_schemas/11/transactions.sql b/synapse/storage/schema/full_schemas/11/transactions.sql index a3f4a0a790..f6a058832e 100644 --- a/synapse/storage/schema/full_schemas/11/transactions.sql +++ b/synapse/storage/schema/full_schemas/11/transactions.sql @@ -25,25 +25,6 @@ CREATE TABLE IF NOT EXISTS received_transactions( CREATE INDEX transactions_have_ref ON received_transactions(origin, has_been_referenced);-- WHERE has_been_referenced = 0; - --- Stores what transactions we've sent, what their response was (if we got one) and whether we have --- since referenced the transaction in another outgoing transaction -CREATE TABLE IF NOT EXISTS sent_transactions( - id INTEGER PRIMARY KEY AUTOINCREMENT, -- This is used to apply insertion ordering - transaction_id TEXT, - destination TEXT, - response_code INTEGER DEFAULT 0, - response_json TEXT, - ts BIGINT -); - -CREATE INDEX sent_transaction_dest ON sent_transactions(destination); -CREATE INDEX sent_transaction_txn_id ON sent_transactions(transaction_id); --- So that we can do an efficient look up of all transactions that have yet to be successfully --- sent. -CREATE INDEX sent_transaction_sent ON sent_transactions(response_code); - - -- For sent transactions only. CREATE TABLE IF NOT EXISTS transaction_id_to_pdu( transaction_id INTEGER, diff --git a/synapse/storage/schema/full_schemas/16/transactions.sql b/synapse/storage/schema/full_schemas/16/transactions.sql index 14b67cce25..17e67bedac 100644 --- a/synapse/storage/schema/full_schemas/16/transactions.sql +++ b/synapse/storage/schema/full_schemas/16/transactions.sql @@ -25,25 +25,6 @@ CREATE TABLE IF NOT EXISTS received_transactions( CREATE INDEX transactions_have_ref ON received_transactions(origin, has_been_referenced);-- WHERE has_been_referenced = 0; - --- Stores what transactions we've sent, what their response was (if we got one) and whether we have --- since referenced the transaction in another outgoing transaction -CREATE TABLE IF NOT EXISTS sent_transactions( - id BIGINT PRIMARY KEY, -- This is used to apply insertion ordering - transaction_id TEXT, - destination TEXT, - response_code INTEGER DEFAULT 0, - response_json TEXT, - ts BIGINT -); - -CREATE INDEX sent_transaction_dest ON sent_transactions(destination); -CREATE INDEX sent_transaction_txn_id ON sent_transactions(transaction_id); --- So that we can do an efficient look up of all transactions that have yet to be successfully --- sent. -CREATE INDEX sent_transaction_sent ON sent_transactions(response_code); - - -- For sent transactions only. CREATE TABLE IF NOT EXISTS transaction_id_to_pdu( transaction_id INTEGER, diff --git a/synapse/storage/search.py b/synapse/storage/search.py index d5b5df93e6..c6420b2374 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -45,6 +45,10 @@ class SearchStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): super(SearchStore, self).__init__(db_conn, hs) + + if not hs.config.enable_search: + return + self.register_background_update_handler( self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search ) @@ -316,6 +320,8 @@ class SearchStore(BackgroundUpdateStore): entries (iterable[SearchEntry]): entries to be added to the table """ + if not self.hs.config.enable_search: + return if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ef65929bb2..6ddc4055d2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -428,14 +428,54 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): """ # for now we do this by looking at the create event. We may want to cache this # more intelligently in future. + + # Retrieve the room's create event + create_event = yield self.get_create_event_for_room(room_id) + defer.returnValue(create_event.content.get("room_version", "1")) + + @defer.inlineCallbacks + def get_room_predecessor(self, room_id): + """Get the predecessor room of an upgraded room if one exists. + Otherwise return None. + + Args: + room_id (str) + + Returns: + Deferred[unicode|None]: predecessor room id + + Raises: + NotFoundError if the room is unknown + """ + # Retrieve the room's create event + create_event = yield self.get_create_event_for_room(room_id) + + # Return predecessor if present + defer.returnValue(create_event.content.get("predecessor", None)) + + @defer.inlineCallbacks + def get_create_event_for_room(self, room_id): + """Get the create state event for a room. + + Args: + room_id (str) + + Returns: + Deferred[EventBase]: The room creation event. + + Raises: + NotFoundError if the room is unknown + """ state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) + # If we can't find the create event, assume we've hit a dead end if not create_id: - raise NotFoundError("Unknown room") + raise NotFoundError("Unknown room %s" % (room_id)) + # Retrieve the room's create event and return create_event = yield self.get_event(create_id) - defer.returnValue(create_event.content.get("room_version", "1")) + defer.returnValue(create_event) @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): @@ -508,6 +548,31 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): _get_filtered_current_state_ids_txn, ) + @defer.inlineCallbacks + def get_canonical_alias_for_room(self, room_id): + """Get canonical alias for room, if any + + Args: + room_id (str) + + Returns: + Deferred[str|None]: The canonical alias, if any + """ + + state = yield self.get_filtered_current_state_ids(room_id, StateFilter.from_types( + [(EventTypes.CanonicalAlias, "")] + )) + + event_id = state.get((EventTypes.CanonicalAlias, "")) + if not event_id: + return + + event = yield self.get_event(event_id, allow_none=True) + if not event: + return + + defer.returnValue(event.content.get("canonical_alias")) + @cached(max_entries=10000, iterable=True) def get_state_group_delta(self, state_group): """Given a state group try to return a previous group and a delta between @@ -1257,6 +1322,7 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" + EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" def __init__(self, db_conn, hs): super(StateStore, self).__init__(db_conn, hs) @@ -1275,6 +1341,12 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): columns=["state_key"], where_clause="type='m.room.member'", ) + self.register_background_index_update( + self.EVENT_STATE_GROUP_INDEX_UPDATE_NAME, + index_name="event_to_state_groups_sg_index", + table="event_to_state_groups", + columns=["state_group"], + ) def _store_event_state_mappings_txn(self, txn, events_and_contexts): state_groups = {} diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index a8781b0e5d..fea866c043 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -22,6 +22,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -31,12 +32,19 @@ logger = logging.getLogger(__name__) class UserDirectoryStore(SQLBaseStore): - @cachedInlineCallbacks(cache_context=True) - def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context): + @defer.inlineCallbacks + def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable """ - current_state_ids = yield self.get_current_state_ids( - room_id, on_invalidate=cache_context.invalidate + + # Create a state filter that only queries join and history state event + types_to_filter = ( + (EventTypes.JoinRules, ""), + (EventTypes.RoomHistoryVisibility, ""), + ) + + current_state_ids = yield self.get_filtered_current_state_ids( + room_id, StateFilter.from_types(types_to_filter) ) join_rules_id = current_state_ids.get((EventTypes.JoinRules, "")) @@ -66,14 +74,8 @@ class UserDirectoryStore(SQLBaseStore): """ yield self._simple_insert_many( table="users_in_public_rooms", - values=[ - { - "user_id": user_id, - "room_id": room_id, - } - for user_id in user_ids - ], - desc="add_users_to_public_room" + values=[{"user_id": user_id, "room_id": room_id} for user_id in user_ids], + desc="add_users_to_public_room", ) for user_id in user_ids: self.get_user_in_public_room.invalidate((user_id,)) @@ -99,7 +101,9 @@ class UserDirectoryStore(SQLBaseStore): """ args = ( ( - user_id, get_localpart_from_id(user_id), get_domain_from_id(user_id), + user_id, + get_localpart_from_id(user_id), + get_domain_from_id(user_id), profile.display_name, ) for user_id, profile in iteritems(users_with_profile) @@ -112,7 +116,7 @@ class UserDirectoryStore(SQLBaseStore): args = ( ( user_id, - "%s %s" % (user_id, p.display_name,) if p.display_name else user_id + "%s %s" % (user_id, p.display_name) if p.display_name else user_id, ) for user_id, p in iteritems(users_with_profile) ) @@ -133,12 +137,10 @@ class UserDirectoryStore(SQLBaseStore): "avatar_url": profile.avatar_url, } for user_id, profile in iteritems(users_with_profile) - ] + ], ) for user_id in users_with_profile: - txn.call_after( - self.get_user_in_directory.invalidate, (user_id,) - ) + txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) return self.runInteraction( "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn @@ -168,39 +170,69 @@ class UserDirectoryStore(SQLBaseStore): if isinstance(self.database_engine, PostgresEngine): # We weight the localpart most highly, then display name and finally # server name - if new_entry: + if self.database_engine.can_native_upsert: sql = """ INSERT INTO user_directory_search(user_id, vector) VALUES (?, setweight(to_tsvector('english', ?), 'A') || setweight(to_tsvector('english', ?), 'D') || setweight(to_tsvector('english', COALESCE(?, '')), 'B') - ) + ) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector """ txn.execute( sql, ( - user_id, get_localpart_from_id(user_id), - get_domain_from_id(user_id), display_name, - ) + user_id, + get_localpart_from_id(user_id), + get_domain_from_id(user_id), + display_name, + ), ) else: - sql = """ - UPDATE user_directory_search - SET vector = setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') - WHERE user_id = ? - """ - txn.execute( - sql, - ( - get_localpart_from_id(user_id), get_domain_from_id(user_id), - display_name, user_id, + # TODO: Remove this code after we've bumped the minimum version + # of postgres to always support upserts, so we can get rid of + # `new_entry` usage + if new_entry is True: + sql = """ + INSERT INTO user_directory_search(user_id, vector) + VALUES (?, + setweight(to_tsvector('english', ?), 'A') + || setweight(to_tsvector('english', ?), 'D') + || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + ) + """ + txn.execute( + sql, + ( + user_id, + get_localpart_from_id(user_id), + get_domain_from_id(user_id), + display_name, + ), + ) + elif new_entry is False: + sql = """ + UPDATE user_directory_search + SET vector = setweight(to_tsvector('english', ?), 'A') + || setweight(to_tsvector('english', ?), 'D') + || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + WHERE user_id = ? + """ + txn.execute( + sql, + ( + get_localpart_from_id(user_id), + get_domain_from_id(user_id), + display_name, + user_id, + ), + ) + else: + raise RuntimeError( + "upsert returned None when 'can_native_upsert' is False" ) - ) elif isinstance(self.database_engine, Sqlite3Engine): - value = "%s %s" % (user_id, display_name,) if display_name else user_id + value = "%s %s" % (user_id, display_name) if display_name else user_id self._simple_upsert_txn( txn, table="user_directory_search", @@ -231,29 +263,18 @@ class UserDirectoryStore(SQLBaseStore): def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): self._simple_delete_txn( - txn, - table="user_directory", - keyvalues={"user_id": user_id}, + txn, table="user_directory", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, - table="user_directory_search", - keyvalues={"user_id": user_id}, + txn, table="user_directory_search", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - ) - txn.call_after( - self.get_user_in_directory.invalidate, (user_id,) + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} ) - txn.call_after( - self.get_user_in_public_room.invalidate, (user_id,) - ) - return self.runInteraction( - "remove_from_user_dir", _remove_from_user_dir_txn, - ) + txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) + txn.call_after(self.get_user_in_public_room.invalidate, (user_id,)) + + return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) @defer.inlineCallbacks def remove_from_user_in_public_room(self, user_id): @@ -338,6 +359,7 @@ class UserDirectoryStore(SQLBaseStore): share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ + def _add_users_who_share_room_txn(txn): self._simple_insert_many_txn( txn, @@ -354,13 +376,12 @@ class UserDirectoryStore(SQLBaseStore): ) for user_id, other_user_id in user_id_tuples: txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, - (user_id,), + self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) txn.call_after( - self.get_if_users_share_a_room.invalidate, - (user_id, other_user_id), + self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) ) + return self.runInteraction( "add_users_who_share_room", _add_users_who_share_room_txn ) @@ -374,6 +395,7 @@ class UserDirectoryStore(SQLBaseStore): share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ + def _update_users_who_share_room_txn(txn): sql = """ UPDATE users_who_share_rooms @@ -381,21 +403,16 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND other_user_id = ? """ txn.executemany( - sql, - ( - (room_id, share_private, uid, oid) - for uid, oid in user_id_sets - ) + sql, ((room_id, share_private, uid, oid) for uid, oid in user_id_sets) ) for user_id, other_user_id in user_id_sets: txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, - (user_id,), + self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) txn.call_after( - self.get_if_users_share_a_room.invalidate, - (user_id, other_user_id), + self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) ) + return self.runInteraction( "update_users_who_share_room", _update_users_who_share_room_txn ) @@ -409,22 +426,18 @@ class UserDirectoryStore(SQLBaseStore): share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ + def _remove_user_who_share_room_txn(txn): self._simple_delete_txn( txn, table="users_who_share_rooms", - keyvalues={ - "user_id": user_id, - "other_user_id": other_user_id, - }, + keyvalues={"user_id": user_id, "other_user_id": other_user_id}, ) txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, - (user_id,), + self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) txn.call_after( - self.get_if_users_share_a_room.invalidate, - (user_id, other_user_id), + self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) ) return self.runInteraction( @@ -445,10 +458,7 @@ class UserDirectoryStore(SQLBaseStore): """ return self._simple_select_one_onecol( table="users_who_share_rooms", - keyvalues={ - "user_id": user_id, - "other_user_id": other_user_id, - }, + keyvalues={"user_id": user_id, "other_user_id": other_user_id}, retcol="share_private", allow_none=True, desc="get_if_users_share_a_room", @@ -466,17 +476,12 @@ class UserDirectoryStore(SQLBaseStore): """ rows = yield self._simple_select_list( table="users_who_share_rooms", - keyvalues={ - "user_id": user_id, - }, - retcols=("other_user_id", "share_private",), + keyvalues={"user_id": user_id}, + retcols=("other_user_id", "share_private"), desc="get_users_who_share_room_with_user", ) - defer.returnValue({ - row["other_user_id"]: row["share_private"] - for row in rows - }) + defer.returnValue({row["other_user_id"]: row["share_private"] for row in rows}) def get_users_in_share_dir_with_room_id(self, user_id, room_id): """Get all user tuples that are in the users_who_share_rooms due to the @@ -523,6 +528,7 @@ class UserDirectoryStore(SQLBaseStore): def delete_all_from_user_dir(self): """Delete the entire user directory """ + def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") @@ -532,6 +538,7 @@ class UserDirectoryStore(SQLBaseStore): txn.call_after(self.get_user_in_public_room.invalidate_all) txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) txn.call_after(self.get_if_users_share_a_room.invalidate_all) + return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn ) @@ -541,7 +548,7 @@ class UserDirectoryStore(SQLBaseStore): return self._simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, - retcols=("room_id", "display_name", "avatar_url",), + retcols=("room_id", "display_name", "avatar_url"), allow_none=True, desc="get_user_in_directory", ) @@ -574,7 +581,9 @@ class UserDirectoryStore(SQLBaseStore): def get_current_state_deltas(self, prev_stream_id): prev_stream_id = int(prev_stream_id) - if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id): + if not self._curr_state_delta_stream_cache.has_any_entity_changed( + prev_stream_id + ): return [] def get_current_state_deltas_txn(txn): @@ -608,7 +617,7 @@ class UserDirectoryStore(SQLBaseStore): WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC """ - txn.execute(sql, (prev_stream_id, max_stream_id,)) + txn.execute(sql, (prev_stream_id, max_stream_id)) return self.cursor_to_dict(txn) return self.runInteraction( @@ -698,8 +707,11 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % (join_clause, where_clause) - args = join_args + (full_query, exact_query, prefix_query, limit + 1,) + """ % ( + join_clause, + where_clause, + ) + args = join_args + (full_query, exact_query, prefix_query, limit + 1) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -716,7 +728,10 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % (join_clause, where_clause) + """ % ( + join_clause, + where_clause, + ) args = join_args + (search_query, limit + 1) else: # This should be unreachable. @@ -728,10 +743,7 @@ class UserDirectoryStore(SQLBaseStore): limited = len(results) > limit - defer.returnValue({ - "limited": limited, - "results": results, - }) + defer.returnValue({"limited": limited, "results": results}) def _parse_query_sqlite(search_term): @@ -746,7 +758,7 @@ def _parse_query_sqlite(search_term): # Pull out the individual words, discarding any non-word characters. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) - return " & ".join("(%s* OR %s)" % (result, result,) for result in results) + return " & ".join("(%s* OR %s)" % (result, result) for result in results) def _parse_query_postgres(search_term): @@ -759,7 +771,7 @@ def _parse_query_postgres(search_term): # Pull out the individual words, discarding any non-word characters. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) - both = " & ".join("(%s:* | %s)" % (result, result,) for result in results) + both = " & ".join("(%s:* | %s)" % (result, result) for result in results) exact = " & ".join("%s" % (result,) for result in results) prefix = " & ".join("%s:*" % (result,) for result in results) diff --git a/synapse/types.py b/synapse/types.py index 41afb27a74..d8cb64addb 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re import string from collections import namedtuple @@ -228,6 +229,71 @@ def contains_invalid_mxid_characters(localpart): return any(c not in mxid_localpart_allowed_characters for c in localpart) +UPPER_CASE_PATTERN = re.compile(b"[A-Z_]") + +# the following is a pattern which matches '=', and bytes which are not allowed in a mxid +# localpart. +# +# It works by: +# * building a string containing the allowed characters (excluding '=') +# * escaping every special character with a backslash (to stop '-' being interpreted as a +# range operator) +# * wrapping it in a '[^...]' regex +# * converting the whole lot to a 'bytes' sequence, so that we can use it to match +# bytes rather than strings +# +NON_MXID_CHARACTER_PATTERN = re.compile( + ("[^%s]" % ( + re.escape("".join(mxid_localpart_allowed_characters - {"="}),), + )).encode("ascii"), +) + + +def map_username_to_mxid_localpart(username, case_sensitive=False): + """Map a username onto a string suitable for a MXID + + This follows the algorithm laid out at + https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets. + + Args: + username (unicode|bytes): username to be mapped + case_sensitive (bool): true if TEST and test should be mapped + onto different mxids + + Returns: + unicode: string suitable for a mxid localpart + """ + if not isinstance(username, bytes): + username = username.encode('utf-8') + + # first we sort out upper-case characters + if case_sensitive: + def f1(m): + return b"_" + m.group().lower() + + username = UPPER_CASE_PATTERN.sub(f1, username) + else: + username = username.lower() + + # then we sort out non-ascii characters + def f2(m): + g = m.group()[0] + if isinstance(g, str): + # on python 2, we need to do a ord(). On python 3, the + # byte itself will do. + g = ord(g) + return b"=%02x" % (g,) + + username = NON_MXID_CHARACTER_PATTERN.sub(f2, username) + + # we also do the =-escaping to mxids starting with an underscore. + username = re.sub(b'^_', b'=5f', username) + + # we should now only have ascii bytes left, so can decode back to a + # unicode. + return username.decode('ascii') + + class StreamToken( namedtuple("Token", ( "room_key", diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index ec7b2c9672..f0e4a0e10c 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -201,7 +201,7 @@ class Linearizer(object): if entry[0] >= self.max_count: res = self._await_lock(key) else: - logger.info( + logger.debug( "Acquired uncontended linearizer lock %r for key %r", self.name, key, ) entry[0] += 1 @@ -215,7 +215,7 @@ class Linearizer(object): try: yield finally: - logger.info("Releasing linearizer lock %r for key %r", self.name, key) + logger.debug("Releasing linearizer lock %r for key %r", self.name, key) # We've finished executing so check if there are any things # blocked waiting to execute and start one of them @@ -247,7 +247,7 @@ class Linearizer(object): """ entry = self.key_to_defer[key] - logger.info( + logger.debug( "Waiting to acquire linearizer lock %r for key %r", self.name, key, ) @@ -255,7 +255,7 @@ class Linearizer(object): entry[1][new_defer] = 1 def cb(_r): - logger.info("Acquired linearizer lock %r for key %r", self.name, key) + logger.debug("Acquired linearizer lock %r for key %r", self.name, key) entry[0] += 1 # if the code holding the lock completes synchronously, then it @@ -273,7 +273,7 @@ class Linearizer(object): def eb(e): logger.info("defer %r got err %r", new_defer, e) if isinstance(e, CancelledError): - logger.info( + logger.debug( "Cancelling wait for linearizer lock %r for key %r", self.name, key, ) @@ -387,12 +387,14 @@ def timeout_deferred(deferred, timeout, reactor, on_timeout_cancel=None): deferred that wraps and times out the given deferred, correctly handling the case where the given deferred's canceller throws. + (See https://twistedmatrix.com/trac/ticket/9534) + NOTE: Unlike `Deferred.addTimeout`, this function returns a new deferred Args: deferred (Deferred) timeout (float): Timeout in seconds - reactor (twisted.internet.reactor): The twisted reactor to use + reactor (twisted.interfaces.IReactorTime): The twisted reactor to use on_timeout_cancel (callable): A callable which is called immediately after the deferred times out, and not if this deferred is otherwise cancelled before the timeout. diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py new file mode 100644 index 0000000000..5ba1862506 --- /dev/null +++ b/synapse/util/caches/ttlcache.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +# Copyright 2015, 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import time + +import attr +from sortedcontainers import SortedList + +from synapse.util.caches import register_cache + +logger = logging.getLogger(__name__) + +SENTINEL = object() + + +class TTLCache(object): + """A key/value cache implementation where each entry has its own TTL""" + + def __init__(self, cache_name, timer=time.time): + # map from key to _CacheEntry + self._data = {} + + # the _CacheEntries, sorted by expiry time + self._expiry_list = SortedList() + + self._timer = timer + + self._metrics = register_cache("ttl", cache_name, self) + + def set(self, key, value, ttl): + """Add/update an entry in the cache + + Args: + key: key for this entry + value: value for this entry + ttl (float): TTL for this entry, in seconds + """ + expiry = self._timer() + ttl + + self.expire() + e = self._data.pop(key, SENTINEL) + if e != SENTINEL: + self._expiry_list.remove(e) + + entry = _CacheEntry(expiry_time=expiry, key=key, value=value) + self._data[key] = entry + self._expiry_list.add(entry) + + def get(self, key, default=SENTINEL): + """Get a value from the cache + + Args: + key: key to look up + default: default value to return, if key is not found. If not set, and the + key is not found, a KeyError will be raised + + Returns: + value from the cache, or the default + """ + self.expire() + e = self._data.get(key, SENTINEL) + if e == SENTINEL: + self._metrics.inc_misses() + if default == SENTINEL: + raise KeyError(key) + return default + self._metrics.inc_hits() + return e.value + + def get_with_expiry(self, key): + """Get a value, and its expiry time, from the cache + + Args: + key: key to look up + + Returns: + Tuple[Any, float]: the value from the cache, and the expiry time + + Raises: + KeyError if the entry is not found + """ + self.expire() + try: + e = self._data[key] + except KeyError: + self._metrics.inc_misses() + raise + self._metrics.inc_hits() + return e.value, e.expiry_time + + def pop(self, key, default=SENTINEL): + """Remove a value from the cache + + If key is in the cache, remove it and return its value, else return default. + If default is not given and key is not in the cache, a KeyError is raised. + + Args: + key: key to look up + default: default value to return, if key is not found. If not set, and the + key is not found, a KeyError will be raised + + Returns: + value from the cache, or the default + """ + self.expire() + e = self._data.pop(key, SENTINEL) + if e == SENTINEL: + self._metrics.inc_misses() + if default == SENTINEL: + raise KeyError(key) + return default + self._expiry_list.remove(e) + self._metrics.inc_hits() + return e.value + + def __getitem__(self, key): + return self.get(key) + + def __delitem__(self, key): + self.pop(key) + + def __contains__(self, key): + return key in self._data + + def __len__(self): + self.expire() + return len(self._data) + + def expire(self): + """Run the expiry on the cache. Any entries whose expiry times are due will + be removed + """ + now = self._timer() + while self._expiry_list: + first_entry = self._expiry_list[0] + if first_entry.expiry_time - now > 0.0: + break + del self._data[first_entry.key] + del self._expiry_list[0] + + +@attr.s(frozen=True, slots=True) +class _CacheEntry(object): + """TTLCache entry""" + # expiry_time is the first attribute, so that entries are sorted by expiry. + expiry_time = attr.ib() + key = attr.ib() + value = attr.ib() diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 4c6e92beb8..311b49e18a 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -285,7 +285,10 @@ class LoggingContext(object): self.alive = False # if we have a parent, pass our CPU usage stats on - if self.parent_context is not None: + if ( + self.parent_context is not None + and hasattr(self.parent_context, '_resource_usage') + ): self.parent_context._resource_usage += self._resource_usage # reset them in case we get entered again diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index 6f318c6a29..fdcb375f95 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -16,7 +16,8 @@ import random import string -from six import PY3 +import six +from six import PY2, PY3 from six.moves import range _string_with_symbols = ( @@ -71,3 +72,39 @@ def to_ascii(s): return s.encode("ascii") except UnicodeEncodeError: return s + + +def exception_to_unicode(e): + """Helper function to extract the text of an exception as a unicode string + + Args: + e (Exception): exception to be stringified + + Returns: + unicode + """ + # urgh, this is a mess. The basic problem here is that psycopg2 constructs its + # exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will + # then produce the raw byte sequence. Under Python 2, this will then cause another + # error if it gets mixed with a `unicode` object, as per + # https://github.com/matrix-org/synapse/issues/4252 + + # First of all, if we're under python3, everything is fine because it will sort this + # nonsense out for us. + if not PY2: + return str(e) + + # otherwise let's have a stab at decoding the exception message. We'll circumvent + # Exception.__str__(), which would explode if someone raised Exception(u'non-ascii') + # and instead look at what is in the args member. + + if len(e.args) == 0: + return u"" + elif len(e.args) > 1: + return six.text_type(repr(e.args)) + + msg = e.args[0] + if isinstance(msg, bytes): + return msg.decode('utf-8', errors='replace') + else: + return msg |