diff options
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/api/errors.py | 6 | ||||
-rw-r--r-- | synapse/app/generic_worker.py | 59 | ||||
-rw-r--r-- | synapse/appservice/api.py | 20 | ||||
-rw-r--r-- | synapse/config/federation.py | 129 | ||||
-rw-r--r-- | synapse/config/homeserver.py | 3 | ||||
-rw-r--r-- | synapse/config/server.py | 66 | ||||
-rw-r--r-- | synapse/event_auth.py | 10 | ||||
-rw-r--r-- | synapse/federation/federation_server.py | 6 | ||||
-rw-r--r-- | synapse/federation/send_queue.py | 14 | ||||
-rw-r--r-- | synapse/federation/sender/__init__.py | 48 | ||||
-rw-r--r-- | synapse/federation/sender/per_destination_queue.py | 22 | ||||
-rw-r--r-- | synapse/handlers/cas_handler.py | 2 | ||||
-rw-r--r-- | synapse/handlers/federation.py | 76 | ||||
-rw-r--r-- | synapse/http/client.py | 14 | ||||
-rw-r--r-- | synapse/http/server.py | 71 | ||||
-rw-r--r-- | synapse/http/servlet.py | 14 | ||||
-rw-r--r-- | synapse/replication/tcp/commands.py | 10 | ||||
-rw-r--r-- | synapse/replication/tcp/handler.py | 4 | ||||
-rw-r--r-- | synapse/storage/data_stores/main/schema/delta/58/10federation_pos_instance_name.sql | 22 | ||||
-rw-r--r-- | synapse/storage/data_stores/main/stream.py | 97 |
20 files changed, 483 insertions, 210 deletions
diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 5305038c21..d5d4522336 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -15,13 +15,11 @@ # limitations under the License. """Contains exceptions and error codes.""" - +import json import logging from http import HTTPStatus from typing import Dict, List -from canonicaljson import json - from twisted.web import http logger = logging.getLogger(__name__) @@ -573,7 +571,7 @@ class HttpResponseException(CodeMessageException): # try to parse the body as json, to get better errcode/msg, but # default to M_UNKNOWN with the HTTP status as the error text try: - j = json.loads(self.response) + j = json.loads(self.response.decode("utf-8")) except ValueError: j = {} diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index f6792d9fc8..e90695f026 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -511,25 +511,7 @@ class GenericWorkerSlavedStore( SearchWorkerStore, BaseSlavedStore, ): - def __init__(self, database, db_conn, hs): - super(GenericWorkerSlavedStore, self).__init__(database, db_conn, hs) - - # We pull out the current federation stream position now so that we - # always have a known value for the federation position in memory so - # that we don't have to bounce via a deferred once when we start the - # replication streams. - self.federation_out_pos_startup = self._get_federation_out_pos(db_conn) - - def _get_federation_out_pos(self, db_conn): - sql = "SELECT stream_id FROM federation_stream_position WHERE type = ?" - sql = self.database_engine.convert_param_style(sql) - - txn = db_conn.cursor() - txn.execute(sql, ("federation",)) - rows = txn.fetchall() - txn.close() - - return rows[0][0] if rows else -1 + pass class GenericWorkerServer(HomeServer): @@ -812,19 +794,11 @@ class FederationSenderHandler(object): self.federation_sender = hs.get_federation_sender() self._hs = hs - # if the worker is restarted, we want to pick up where we left off in - # the replication stream, so load the position from the database. - # - # XXX is this actually worthwhile? Whenever the master is restarted, we'll - # drop some rows anyway (which is mostly fine because we're only dropping - # typing and presence notifications). If the replication stream is - # unreliable, why do we do all this hoop-jumping to store the position in the - # database? See also https://github.com/matrix-org/synapse/issues/7535. - # - self.federation_position = self.store.federation_out_pos_startup + # Stores the latest position in the federation stream we've gotten up + # to. This is always set before we use it. + self.federation_position = None self._fed_position_linearizer = Linearizer(name="_fed_position_linearizer") - self._last_ack = self.federation_position def on_start(self): # There may be some events that are persisted but haven't been sent, @@ -932,7 +906,6 @@ class FederationSenderHandler(object): # We ACK this token over replication so that the master can drop # its in memory queues self._hs.get_tcp_replication().send_federation_ack(current_position) - self._last_ack = current_position except Exception: logger.exception("Error updating federation stream position") @@ -960,7 +933,7 @@ def start(config_options): ) if config.worker_app == "synapse.app.appservice": - if config.notify_appservices: + if config.appservice.notify_appservices: sys.stderr.write( "\nThe appservices must be disabled in the main synapse process" "\nbefore they can be run in a separate worker." @@ -970,13 +943,13 @@ def start(config_options): sys.exit(1) # Force the appservice to start since they will be disabled in the main config - config.notify_appservices = True + config.appservice.notify_appservices = True else: # For other worker types we force this to off. - config.notify_appservices = False + config.appservice.notify_appservices = False if config.worker_app == "synapse.app.pusher": - if config.start_pushers: + if config.server.start_pushers: sys.stderr.write( "\nThe pushers must be disabled in the main synapse process" "\nbefore they can be run in a separate worker." @@ -986,13 +959,13 @@ def start(config_options): sys.exit(1) # Force the pushers to start since they will be disabled in the main config - config.start_pushers = True + config.server.start_pushers = True else: # For other worker types we force this to off. - config.start_pushers = False + config.server.start_pushers = False if config.worker_app == "synapse.app.user_dir": - if config.update_user_directory: + if config.server.update_user_directory: sys.stderr.write( "\nThe update_user_directory must be disabled in the main synapse process" "\nbefore they can be run in a separate worker." @@ -1002,13 +975,13 @@ def start(config_options): sys.exit(1) # Force the pushers to start since they will be disabled in the main config - config.update_user_directory = True + config.server.update_user_directory = True else: # For other worker types we force this to off. - config.update_user_directory = False + config.server.update_user_directory = False if config.worker_app == "synapse.app.federation_sender": - if config.send_federation: + if config.federation.send_federation: sys.stderr.write( "\nThe send_federation must be disabled in the main synapse process" "\nbefore they can be run in a separate worker." @@ -1018,10 +991,10 @@ def start(config_options): sys.exit(1) # Force the pushers to start since they will be disabled in the main config - config.send_federation = True + config.federation.send_federation = True else: # For other worker types we force this to off. - config.send_federation = False + config.federation.send_federation = False synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index f92bfb420b..1e0e4d497d 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -19,7 +19,7 @@ from prometheus_client import Counter from twisted.internet import defer -from synapse.api.constants import ThirdPartyEntityKind +from synapse.api.constants import EventTypes, ThirdPartyEntityKind from synapse.api.errors import CodeMessageException from synapse.events.utils import serialize_event from synapse.http.client import SimpleHttpClient @@ -207,7 +207,7 @@ class ApplicationServiceApi(SimpleHttpClient): if service.url is None: return True - events = self._serialize(events) + events = self._serialize(service, events) if txn_id is None: logger.warning( @@ -233,6 +233,18 @@ class ApplicationServiceApi(SimpleHttpClient): failed_transactions_counter.labels(service.id).inc() return False - def _serialize(self, events): + def _serialize(self, service, events): time_now = self.clock.time_msec() - return [serialize_event(e, time_now, as_client_event=True) for e in events] + return [ + serialize_event( + e, + time_now, + as_client_event=True, + is_invite=( + e.type == EventTypes.Member + and e.membership == "invite" + and service.is_interested_in_user(e.state_key) + ), + ) + for e in events + ] diff --git a/synapse/config/federation.py b/synapse/config/federation.py new file mode 100644 index 0000000000..7782ab4c9d --- /dev/null +++ b/synapse/config/federation.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from hashlib import sha256 +from typing import List, Optional + +import attr +from netaddr import IPSet + +from ._base import Config, ConfigError + + +@attr.s +class ShardedFederationSendingConfig: + """Algorithm for choosing which federation sender instance is responsible + for which destionation host. + """ + + instances = attr.ib(type=List[str]) + + def should_send_to(self, instance_name: str, destination: str) -> bool: + """Whether this instance is responsible for sending transcations for + the given host. + """ + + # If multiple federation senders are not defined we always return true. + if not self.instances or len(self.instances) == 1: + return True + + # We shard by taking the hash, modulo it by the number of federation + # senders and then checking whether this instance matches the instance + # at that index. + # + # (Technically this introduces some bias and is not entirely uniform, but + # since the hash is so large the bias is ridiculously small). + dest_hash = sha256(destination.encode("utf8")).digest() + dest_int = int.from_bytes(dest_hash, byteorder="little") + remainder = dest_int % (len(self.instances)) + return self.instances[remainder] == instance_name + + +class FederationConfig(Config): + section = "federation" + + def read_config(self, config, **kwargs): + # Whether to send federation traffic out in this process. This only + # applies to some federation traffic, and so shouldn't be used to + # "disable" federation + self.send_federation = config.get("send_federation", True) + + federation_sender_instances = config.get("federation_sender_instances") or [] + self.federation_shard_config = ShardedFederationSendingConfig( + federation_sender_instances + ) + + # FIXME: federation_domain_whitelist needs sytests + self.federation_domain_whitelist = None # type: Optional[dict] + federation_domain_whitelist = config.get("federation_domain_whitelist", None) + + if federation_domain_whitelist is not None: + # turn the whitelist into a hash for speed of lookup + self.federation_domain_whitelist = {} + + for domain in federation_domain_whitelist: + self.federation_domain_whitelist[domain] = True + + self.federation_ip_range_blacklist = config.get( + "federation_ip_range_blacklist", [] + ) + + # Attempt to create an IPSet from the given ranges + try: + self.federation_ip_range_blacklist = IPSet( + self.federation_ip_range_blacklist + ) + + # Always blacklist 0.0.0.0, :: + self.federation_ip_range_blacklist.update(["0.0.0.0", "::"]) + except Exception as e: + raise ConfigError( + "Invalid range(s) provided in federation_ip_range_blacklist: %s" % e + ) + + def generate_config_section(self, config_dir_path, server_name, **kwargs): + return """\ + # Restrict federation to the following whitelist of domains. + # N.B. we recommend also firewalling your federation listener to limit + # inbound federation traffic as early as possible, rather than relying + # purely on this application-layer restriction. If not specified, the + # default is to whitelist everything. + # + #federation_domain_whitelist: + # - lon.example.com + # - nyc.example.com + # - syd.example.com + + # Prevent federation requests from being sent to the following + # blacklist IP address CIDR ranges. If this option is not specified, or + # specified with an empty list, no ip range blacklist will be enforced. + # + # As of Synapse v1.4.0 this option also affects any outbound requests to identity + # servers provided by user input. + # + # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly + # listed here, since they correspond to unroutable addresses.) + # + federation_ip_range_blacklist: + - '127.0.0.0/8' + - '10.0.0.0/8' + - '172.16.0.0/12' + - '192.168.0.0/16' + - '100.64.0.0/10' + - '169.254.0.0/16' + - '::1/128' + - 'fe80::/64' + - 'fc00::/7' + """ diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 264c274c52..8e93d31394 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -23,6 +23,7 @@ from .cas import CasConfig from .consent_config import ConsentConfig from .database import DatabaseConfig from .emailconfig import EmailConfig +from .federation import FederationConfig from .groups import GroupsConfig from .jwt_config import JWTConfig from .key import KeyConfig @@ -57,6 +58,7 @@ class HomeServerConfig(RootConfig): config_classes = [ ServerConfig, TlsConfig, + FederationConfig, CacheConfig, DatabaseConfig, LoggingConfig, @@ -90,4 +92,5 @@ class HomeServerConfig(RootConfig): ThirdPartyRulesConfig, TracerConfig, RedisConfig, + FederationConfig, ] diff --git a/synapse/config/server.py b/synapse/config/server.py index 8204664883..b6afa642ca 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -23,7 +23,6 @@ from typing import Any, Dict, Iterable, List, Optional import attr import yaml -from netaddr import IPSet from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.http.endpoint import parse_and_validate_server_name @@ -136,11 +135,6 @@ class ServerConfig(Config): self.use_frozen_dicts = config.get("use_frozen_dicts", False) self.public_baseurl = config.get("public_baseurl") - # Whether to send federation traffic out in this process. This only - # applies to some federation traffic, and so shouldn't be used to - # "disable" federation - self.send_federation = config.get("send_federation", True) - # Whether to enable user presence. self.use_presence = config.get("use_presence", True) @@ -263,34 +257,6 @@ class ServerConfig(Config): # due to resource constraints self.admin_contact = config.get("admin_contact", None) - # FIXME: federation_domain_whitelist needs sytests - self.federation_domain_whitelist = None # type: Optional[dict] - federation_domain_whitelist = config.get("federation_domain_whitelist", None) - - if federation_domain_whitelist is not None: - # turn the whitelist into a hash for speed of lookup - self.federation_domain_whitelist = {} - - for domain in federation_domain_whitelist: - self.federation_domain_whitelist[domain] = True - - self.federation_ip_range_blacklist = config.get( - "federation_ip_range_blacklist", [] - ) - - # Attempt to create an IPSet from the given ranges - try: - self.federation_ip_range_blacklist = IPSet( - self.federation_ip_range_blacklist - ) - - # Always blacklist 0.0.0.0, :: - self.federation_ip_range_blacklist.update(["0.0.0.0", "::"]) - except Exception as e: - raise ConfigError( - "Invalid range(s) provided in federation_ip_range_blacklist: %s" % e - ) - if self.public_baseurl is not None: if self.public_baseurl[-1] != "/": self.public_baseurl += "/" @@ -743,38 +709,6 @@ class ServerConfig(Config): # #enable_search: false - # Restrict federation to the following whitelist of domains. - # N.B. we recommend also firewalling your federation listener to limit - # inbound federation traffic as early as possible, rather than relying - # purely on this application-layer restriction. If not specified, the - # default is to whitelist everything. - # - #federation_domain_whitelist: - # - lon.example.com - # - nyc.example.com - # - syd.example.com - - # Prevent federation requests from being sent to the following - # blacklist IP address CIDR ranges. If this option is not specified, or - # specified with an empty list, no ip range blacklist will be enforced. - # - # As of Synapse v1.4.0 this option also affects any outbound requests to identity - # servers provided by user input. - # - # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly - # listed here, since they correspond to unroutable addresses.) - # - federation_ip_range_blacklist: - - '127.0.0.0/8' - - '10.0.0.0/8' - - '172.16.0.0/12' - - '192.168.0.0/16' - - '100.64.0.0/10' - - '169.254.0.0/16' - - '::1/128' - - 'fe80::/64' - - 'fc00::/7' - # List of ports that Synapse should listen on, their purpose and their # configuration. # diff --git a/synapse/event_auth.py b/synapse/event_auth.py index c582355146..c0981eee62 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -65,14 +65,16 @@ def check( room_id = event.room_id - # I'm not really expecting to get auth events in the wrong room, but let's - # sanity-check it + # We need to ensure that the auth events are actually for the same room, to + # stop people from using powers they've been granted in other rooms for + # example. for auth_event in auth_events.values(): if auth_event.room_id != room_id: - raise Exception( + raise AuthError( + 403, "During auth for event %s in room %s, found event %s in the state " "which is in room %s" - % (event.event_id, room_id, auth_event.event_id, auth_event.room_id) + % (event.event_id, room_id, auth_event.event_id, auth_event.room_id), ) if do_sig_check: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 86051decd4..2aab9c5f55 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -14,10 +14,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import json import logging from typing import Any, Callable, Dict, List, Match, Optional, Tuple, Union -from canonicaljson import json from prometheus_client import Counter, Histogram from twisted.internet import defer @@ -526,9 +526,9 @@ class FederationServer(FederationBase): json_result = {} # type: Dict[str, Dict[str, dict]] for user_id, device_keys in results.items(): for device_id, keys in device_keys.items(): - for key_id, json_bytes in keys.items(): + for key_id, json_str in keys.items(): json_result.setdefault(user_id, {})[device_id] = { - key_id: json.loads(json_bytes) + key_id: json.loads(json_str) } logger.info( diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 860b03f7b9..4fc9ff92e5 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -55,6 +55,11 @@ class FederationRemoteSendQueue(object): self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id + # We may have multiple federation sender instances, so we need to track + # their positions separately. + self._sender_instances = hs.config.federation.federation_shard_config.instances + self._sender_positions = {} + # Pending presence map user_id -> UserPresenceState self.presence_map = {} # type: Dict[str, UserPresenceState] @@ -261,7 +266,14 @@ class FederationRemoteSendQueue(object): def get_current_token(self): return self.pos - 1 - def federation_ack(self, token): + def federation_ack(self, instance_name, token): + if self._sender_instances: + # If we have configured multiple federation sender instances we need + # to track their positions separately, and only clear the queue up + # to the token all instances have acked. + self._sender_positions[instance_name] = token + token = min(self._sender_positions.values()) + self._clear_queue_before_pos(token) async def get_replication_rows( diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 464d7a41de..4b63a0755f 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -69,6 +69,9 @@ class FederationSender(object): self._transaction_manager = TransactionManager(hs) + self._instance_name = hs.get_instance_name() + self._federation_shard_config = hs.config.federation.federation_shard_config + # map from destination to PerDestinationQueue self._per_destination_queues = {} # type: Dict[str, PerDestinationQueue] @@ -191,7 +194,13 @@ class FederationSender(object): ) return - destinations = set(destinations) + destinations = { + d + for d in destinations + if self._federation_shard_config.should_send_to( + self._instance_name, d + ) + } if send_on_behalf_of is not None: # If we are sending the event on behalf of another server @@ -322,7 +331,12 @@ class FederationSender(object): # Work out which remote servers should be poked and poke them. domains = yield self.state.get_current_hosts_in_room(room_id) - domains = [d for d in domains if d != self.server_name] + domains = [ + d + for d in domains + if d != self.server_name + and self._federation_shard_config.should_send_to(self._instance_name, d) + ] if not domains: return @@ -427,6 +441,10 @@ class FederationSender(object): for destination in destinations: if destination == self.server_name: continue + if not self._federation_shard_config.should_send_to( + self._instance_name, destination + ): + continue self._get_per_destination_queue(destination).send_presence(states) @measure_func("txnqueue._process_presence") @@ -441,6 +459,12 @@ class FederationSender(object): for destination in destinations: if destination == self.server_name: continue + + if not self._federation_shard_config.should_send_to( + self._instance_name, destination + ): + continue + self._get_per_destination_queue(destination).send_presence(states) def build_and_send_edu( @@ -462,6 +486,11 @@ class FederationSender(object): logger.info("Not sending EDU to ourselves") return + if not self._federation_shard_config.should_send_to( + self._instance_name, destination + ): + return + edu = Edu( origin=self.server_name, destination=destination, @@ -478,6 +507,11 @@ class FederationSender(object): edu: edu to send key: clobbering key for this edu """ + if not self._federation_shard_config.should_send_to( + self._instance_name, edu.destination + ): + return + queue = self._get_per_destination_queue(edu.destination) if key: queue.send_keyed_edu(edu, key) @@ -489,6 +523,11 @@ class FederationSender(object): logger.warning("Not sending device update to ourselves") return + if not self._federation_shard_config.should_send_to( + self._instance_name, destination + ): + return + self._get_per_destination_queue(destination).attempt_new_transaction() def wake_destination(self, destination: str): @@ -502,6 +541,11 @@ class FederationSender(object): logger.warning("Not waking up ourselves") return + if not self._federation_shard_config.should_send_to( + self._instance_name, destination + ): + return + self._get_per_destination_queue(destination).attempt_new_transaction() @staticmethod diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 12966e239b..6402136e8a 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -74,6 +74,20 @@ class PerDestinationQueue(object): self._clock = hs.get_clock() self._store = hs.get_datastore() self._transaction_manager = transaction_manager + self._instance_name = hs.get_instance_name() + self._federation_shard_config = hs.config.federation.federation_shard_config + + self._should_send_on_this_instance = True + if not self._federation_shard_config.should_send_to( + self._instance_name, destination + ): + # We don't raise an exception here to avoid taking out any other + # processing. We have a guard in `attempt_new_transaction` that + # ensure we don't start sending stuff. + logger.error( + "Create a per destination queue for %s on wrong worker", destination, + ) + self._should_send_on_this_instance = False self._destination = destination self.transmission_loop_running = False @@ -180,6 +194,14 @@ class PerDestinationQueue(object): logger.debug("TX [%s] Transaction already in progress", self._destination) return + if not self._should_send_on_this_instance: + # We don't raise an exception here to avoid taking out any other + # processing. + logger.error( + "Trying to start a transaction to %s on wrong worker", self._destination + ) + return + logger.debug("TX [%s] Starting transaction loop", self._destination) run_as_background_process( diff --git a/synapse/handlers/cas_handler.py b/synapse/handlers/cas_handler.py index d79ffefdb5..786e608fa2 100644 --- a/synapse/handlers/cas_handler.py +++ b/synapse/handlers/cas_handler.py @@ -104,7 +104,7 @@ class CasHandler: return user, displayname def _parse_cas_response( - self, cas_response_body: str + self, cas_response_body: bytes ) -> Tuple[str, Dict[str, Optional[str]]]: """ Retrieve the user and other parameters from the CAS response. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ca7da42a3f..e43bccd721 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -61,6 +61,7 @@ from synapse.logging.context import ( run_in_background, ) from synapse.logging.utils import log_function +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, @@ -618,6 +619,11 @@ class FederationHandler(BaseHandler): will be omitted from the result. Likewise, any events which turn out not to be in the given room. + This function *does not* automatically get missing auth events of the + newly fetched events. Callers must include the full auth chain of + of the missing events in the `event_ids` argument, to ensure that any + missing auth events are correctly fetched. + Returns: map from event_id to event """ @@ -784,15 +790,25 @@ class FederationHandler(BaseHandler): resync = True if resync: - await self.store.mark_remote_user_device_cache_as_stale(event.sender) + run_as_background_process( + "resync_device_due_to_pdu", self._resync_device, event.sender + ) - # Immediately attempt a resync in the background - if self.config.worker_app: - return run_in_background(self._user_device_resync, event.sender) - else: - return run_in_background( - self._device_list_updater.user_device_resync, event.sender - ) + async def _resync_device(self, sender: str) -> None: + """We have detected that the device list for the given user may be out + of sync, so we try and resync them. + """ + + try: + await self.store.mark_remote_user_device_cache_as_stale(sender) + + # Immediately attempt a resync in the background + if self.config.worker_app: + await self._user_device_resync(user_id=sender) + else: + await self._device_list_updater.user_device_resync(sender) + except Exception: + logger.exception("Failed to resync device for %s", sender) @log_function async def backfill(self, dest, room_id, limit, extremities): @@ -1131,12 +1147,16 @@ class FederationHandler(BaseHandler): ): """Fetch the given events from a server, and persist them as outliers. + This function *does not* recursively get missing auth events of the + newly fetched events. Callers must include in the `events` argument + any missing events from the auth chain. + Logs a warning if we can't find the given event. """ room_version = await self.store.get_room_version(room_id) - event_infos = [] + event_map = {} # type: Dict[str, EventBase] async def get_event(event_id: str): with nested_logging_context(event_id): @@ -1150,17 +1170,7 @@ class FederationHandler(BaseHandler): ) return - # recursively fetch the auth events for this event - auth_events = await self._get_events_from_store_or_dest( - destination, room_id, event.auth_event_ids() - ) - auth = {} - for auth_event_id in event.auth_event_ids(): - ae = auth_events.get(auth_event_id) - if ae: - auth[(ae.type, ae.state_key)] = ae - - event_infos.append(_NewEventInfo(event, None, auth)) + event_map[event.event_id] = event except Exception as e: logger.warning( @@ -1172,6 +1182,32 @@ class FederationHandler(BaseHandler): await concurrently_execute(get_event, events, 5) + # Make a map of auth events for each event. We do this after fetching + # all the events as some of the events' auth events will be in the list + # of requested events. + + auth_events = [ + aid + for event in event_map.values() + for aid in event.auth_event_ids() + if aid not in event_map + ] + persisted_events = await self.store.get_events( + auth_events, allow_rejected=True, + ) + + event_infos = [] + for event in event_map.values(): + auth = {} + for auth_event_id in event.auth_event_ids(): + ae = persisted_events.get(auth_event_id) or event_map.get(auth_event_id) + if ae: + auth[(ae.type, ae.state_key)] = ae + else: + logger.info("Missing auth event %s", auth_event_id) + + event_infos.append(_NewEventInfo(event, None, auth)) + await self._handle_new_events( destination, event_infos, ) diff --git a/synapse/http/client.py b/synapse/http/client.py index 8743e9839d..505872ee90 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -13,13 +13,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import json import logging import urllib from io import BytesIO import treq -from canonicaljson import encode_canonical_json, json +from canonicaljson import encode_canonical_json from netaddr import IPAddress from prometheus_client import Counter from zope.interface import implementer, provider @@ -371,7 +371,7 @@ class SimpleHttpClient(object): body = yield make_deferred_yieldable(readBody(response)) if 200 <= response.code < 300: - return json.loads(body) + return json.loads(body.decode("utf-8")) else: raise HttpResponseException(response.code, response.phrase, body) @@ -412,7 +412,7 @@ class SimpleHttpClient(object): body = yield make_deferred_yieldable(readBody(response)) if 200 <= response.code < 300: - return json.loads(body) + return json.loads(body.decode("utf-8")) else: raise HttpResponseException(response.code, response.phrase, body) @@ -441,7 +441,7 @@ class SimpleHttpClient(object): actual_headers.update(headers) body = yield self.get_raw(uri, args, headers=headers) - return json.loads(body) + return json.loads(body.decode("utf-8")) @defer.inlineCallbacks def put_json(self, uri, json_body, args={}, headers=None): @@ -485,7 +485,7 @@ class SimpleHttpClient(object): body = yield make_deferred_yieldable(readBody(response)) if 200 <= response.code < 300: - return json.loads(body) + return json.loads(body.decode("utf-8")) else: raise HttpResponseException(response.code, response.phrase, body) @@ -503,7 +503,7 @@ class SimpleHttpClient(object): header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the - HTTP body at text. + HTTP body as bytes. Raises: HttpResponseException on a non-2xx HTTP response. """ diff --git a/synapse/http/server.py b/synapse/http/server.py index 2b35f86066..cff49202f4 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -217,7 +217,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta): return NOT_DONE_YET @wrap_async_request_handler - async def _async_render_wrapper(self, request): + async def _async_render_wrapper(self, request: SynapseRequest): """This is a wrapper that delegates to `_async_render` and handles exceptions, return values, metrics, etc. """ @@ -237,7 +237,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta): f = failure.Failure() self._send_error_response(f, request) - async def _async_render(self, request): + async def _async_render(self, request: Request): """Delegates to `_async_render_<METHOD>` methods, or returns a 400 if no appropriate method exists. Can be overriden in sub classes for different routing. @@ -278,7 +278,7 @@ class DirectServeJsonResource(_AsyncResource): """ def _send_response( - self, request, code, response_object, + self, request: Request, code: int, response_object: Any, ): """Implements _AsyncResource._send_response """ @@ -507,14 +507,29 @@ class RootOptionsRedirectResource(OptionsResource, RootRedirect): def respond_with_json( - request, - code, - json_object, - send_cors=False, - response_code_message=None, - pretty_print=False, - canonical_json=True, + request: Request, + code: int, + json_object: Any, + send_cors: bool = False, + pretty_print: bool = False, + canonical_json: bool = True, ): + """Sends encoded JSON in response to the given request. + + Args: + request: The http request to respond to. + code: The HTTP response code. + json_object: The object to serialize to JSON. + send_cors: Whether to send Cross-Origin Resource Sharing headers + https://fetch.spec.whatwg.org/#http-cors-protocol + pretty_print: Whether to include indentation and line-breaks in the + resulting JSON bytes. + canonical_json: Whether to use the canonicaljson algorithm when encoding + the JSON bytes. + + Returns: + twisted.web.server.NOT_DONE_YET if the request is still active. + """ # could alternatively use request.notifyFinish() and flip a flag when # the Deferred fires, but since the flag is RIGHT THERE it seems like # a waste. @@ -522,7 +537,7 @@ def respond_with_json( logger.warning( "Not sending response to request %s, already disconnected.", request ) - return + return None if pretty_print: json_bytes = encode_pretty_printed_json(json_object) + b"\n" @@ -533,30 +548,26 @@ def respond_with_json( else: json_bytes = json.dumps(json_object).encode("utf-8") - return respond_with_json_bytes( - request, - code, - json_bytes, - send_cors=send_cors, - response_code_message=response_code_message, - ) + return respond_with_json_bytes(request, code, json_bytes, send_cors=send_cors) def respond_with_json_bytes( - request, code, json_bytes, send_cors=False, response_code_message=None + request: Request, code: int, json_bytes: bytes, send_cors: bool = False, ): """Sends encoded JSON in response to the given request. Args: - request (twisted.web.http.Request): The http request to respond to. - code (int): The HTTP response code. - json_bytes (bytes): The json bytes to use as the response body. - send_cors (bool): Whether to send Cross-Origin Resource Sharing headers + request: The http request to respond to. + code: The HTTP response code. + json_bytes: The json bytes to use as the response body. + send_cors: Whether to send Cross-Origin Resource Sharing headers https://fetch.spec.whatwg.org/#http-cors-protocol + Returns: - twisted.web.server.NOT_DONE_YET""" + twisted.web.server.NOT_DONE_YET if the request is still active. + """ - request.setResponseCode(code, message=response_code_message) + request.setResponseCode(code) request.setHeader(b"Content-Type", b"application/json") request.setHeader(b"Content-Length", b"%d" % (len(json_bytes),)) request.setHeader(b"Cache-Control", b"no-cache, no-store, must-revalidate") @@ -573,12 +584,12 @@ def respond_with_json_bytes( return NOT_DONE_YET -def set_cors_headers(request): - """Set the CORs headers so that javascript running in a web browsers can +def set_cors_headers(request: Request): + """Set the CORS headers so that javascript running in a web browsers can use this API Args: - request (twisted.web.http.Request): The http request to add CORs to. + request: The http request to add CORS to. """ request.setHeader(b"Access-Control-Allow-Origin", b"*") request.setHeader( @@ -643,7 +654,7 @@ def set_clickjacking_protection_headers(request: Request): request.setHeader(b"Content-Security-Policy", b"frame-ancestors 'none';") -def finish_request(request): +def finish_request(request: Request): """ Finish writing the response to the request. Twisted throws a RuntimeException if the connection closed before the @@ -662,7 +673,7 @@ def finish_request(request): logger.info("Connection disconnected before response was written: %r", e) -def _request_user_agent_is_curl(request): +def _request_user_agent_is_curl(request: Request) -> bool: user_agents = request.requestHeaders.getRawHeaders(b"User-Agent", default=[]) for user_agent in user_agents: if b"curl" in user_agent: diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 13fcb408a6..3cabe9d02e 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -14,11 +14,9 @@ # limitations under the License. """ This module contains base REST classes for constructing REST servlets. """ - +import json import logging -from canonicaljson import json - from synapse.api.errors import Codes, SynapseError logger = logging.getLogger(__name__) @@ -214,16 +212,8 @@ def parse_json_value_from_request(request, allow_empty_body=False): if not content_bytes and allow_empty_body: return None - # Decode to Unicode so that simplejson will return Unicode strings on - # Python 2 - try: - content_unicode = content_bytes.decode("utf8") - except UnicodeDecodeError: - logger.warning("Unable to decode UTF-8") - raise SynapseError(400, "Content not JSON.", errcode=Codes.NOT_JSON) - try: - content = json.loads(content_unicode) + content = json.loads(content_bytes.decode("utf-8")) except Exception as e: logger.warning("Unable to parse JSON: %s", e) raise SynapseError(400, "Content not JSON.", errcode=Codes.NOT_JSON) diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index ccc7f1f0d1..f33801f883 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -293,20 +293,22 @@ class FederationAckCommand(Command): Format:: - FEDERATION_ACK <token> + FEDERATION_ACK <instance_name> <token> """ NAME = "FEDERATION_ACK" - def __init__(self, token): + def __init__(self, instance_name, token): + self.instance_name = instance_name self.token = token @classmethod def from_line(cls, line): - return cls(int(line)) + instance_name, token = line.split(" ") + return cls(instance_name, int(token)) def to_line(self): - return str(self.token) + return "%s %s" % (self.instance_name, self.token) class RemovePusherCommand(Command): diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index 55b3b79008..80f5df60f9 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -238,7 +238,7 @@ class ReplicationCommandHandler: federation_ack_counter.inc() if self._federation_sender: - self._federation_sender.federation_ack(cmd.token) + self._federation_sender.federation_ack(cmd.instance_name, cmd.token) async def on_REMOVE_PUSHER( self, conn: AbstractConnection, cmd: RemovePusherCommand @@ -527,7 +527,7 @@ class ReplicationCommandHandler: """Ack data for the federation stream. This allows the master to drop data stored purely in memory. """ - self.send_command(FederationAckCommand(token)) + self.send_command(FederationAckCommand(self._instance_name, token)) def send_user_sync( self, instance_id: str, user_id: str, is_syncing: bool, last_sync_ms: int diff --git a/synapse/storage/data_stores/main/schema/delta/58/10federation_pos_instance_name.sql b/synapse/storage/data_stores/main/schema/delta/58/10federation_pos_instance_name.sql new file mode 100644 index 0000000000..1cc2633aad --- /dev/null +++ b/synapse/storage/data_stores/main/schema/delta/58/10federation_pos_instance_name.sql @@ -0,0 +1,22 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We need to store the stream positions by instance in a sharded config world. +-- +-- We default to master as we want the column to be NOT NULL and we correctly +-- reset the instance name to match the config each time we start up. +ALTER TABLE federation_stream_position ADD COLUMN instance_name TEXT NOT NULL DEFAULT 'master'; + +CREATE UNIQUE INDEX federation_stream_position_instance ON federation_stream_position(type, instance_name); diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 379d758b5d..5e32c7aa1e 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -45,7 +45,7 @@ from twisted.internet import defer from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.storage._base import SQLBaseStore from synapse.storage.data_stores.main.events_worker import EventsWorkerStore -from synapse.storage.database import Database +from synapse.storage.database import Database, make_in_list_sql_clause from synapse.storage.engines import PostgresEngine from synapse.types import RoomStreamToken from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -253,6 +253,16 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): def __init__(self, database: Database, db_conn, hs): super(StreamWorkerStore, self).__init__(database, db_conn, hs) + self._instance_name = hs.get_instance_name() + self._send_federation = hs.should_send_federation() + self._federation_shard_config = hs.config.federation.federation_shard_config + + # If we're a process that sends federation we may need to reset the + # `federation_stream_position` table to match the current sharding + # config. We don't do this now as otherwise two processes could conflict + # during startup which would cause one to die. + self._need_to_reset_federation_stream_positions = self._send_federation + events_max = self.get_room_max_stream_ordering() event_cache_prefill, min_event_val = self.db.get_cache_dict( db_conn, @@ -793,22 +803,95 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): return upper_bound, events - def get_federation_out_pos(self, typ): - return self.db.simple_select_one_onecol( + async def get_federation_out_pos(self, typ: str) -> int: + if self._need_to_reset_federation_stream_positions: + await self.db.runInteraction( + "_reset_federation_positions_txn", self._reset_federation_positions_txn + ) + self._need_to_reset_federation_stream_positions = False + + return await self.db.simple_select_one_onecol( table="federation_stream_position", retcol="stream_id", - keyvalues={"type": typ}, + keyvalues={"type": typ, "instance_name": self._instance_name}, desc="get_federation_out_pos", ) - def update_federation_out_pos(self, typ, stream_id): - return self.db.simple_update_one( + async def update_federation_out_pos(self, typ, stream_id): + if self._need_to_reset_federation_stream_positions: + await self.db.runInteraction( + "_reset_federation_positions_txn", self._reset_federation_positions_txn + ) + self._need_to_reset_federation_stream_positions = False + + return await self.db.simple_update_one( table="federation_stream_position", - keyvalues={"type": typ}, + keyvalues={"type": typ, "instance_name": self._instance_name}, updatevalues={"stream_id": stream_id}, desc="update_federation_out_pos", ) + def _reset_federation_positions_txn(self, txn): + """Fiddles with the `federation_stream_position` table to make it match + the configured federation sender instances during start up. + """ + + # The federation sender instances may have changed, so we need to + # massage the `federation_stream_position` table to have a row per type + # per instance sending federation. If there is a mismatch we update the + # table with the correct rows using the *minimum* stream ID seen. This + # may result in resending of events/EDUs to remote servers, but that is + # preferable to dropping them. + + if not self._send_federation: + return + + # Pull out the configured instances. If we don't have a shard config then + # we assume that we're the only instance sending. + configured_instances = self._federation_shard_config.instances + if not configured_instances: + configured_instances = [self._instance_name] + elif self._instance_name not in configured_instances: + return + + instances_in_table = self.db.simple_select_onecol_txn( + txn, + table="federation_stream_position", + keyvalues={}, + retcol="instance_name", + ) + + if set(instances_in_table) == set(configured_instances): + # Nothing to do + return + + sql = """ + SELECT type, MIN(stream_id) FROM federation_stream_position + GROUP BY type + """ + txn.execute(sql) + min_positions = dict(txn) # Map from type -> min position + + # Ensure we do actually have some values here + assert set(min_positions) == {"federation", "events"} + + sql = """ + DELETE FROM federation_stream_position + WHERE NOT (%s) + """ + clause, args = make_in_list_sql_clause( + txn.database_engine, "instance_name", configured_instances + ) + txn.execute(sql % (clause,), args) + + for typ, stream_id in min_positions.items(): + self.db.simple_upsert_txn( + txn, + table="federation_stream_position", + keyvalues={"type": typ, "instance_name": self._instance_name}, + values={"stream_id": stream_id}, + ) + def has_room_changed_since(self, room_id, stream_id): return self._events_stream_cache.has_entity_changed(room_id, stream_id) |