diff options
Diffstat (limited to 'synapse')
102 files changed, 4241 insertions, 2776 deletions
diff --git a/synapse/__init__.py b/synapse/__init__.py index 25c10244d3..6bb5a8b24d 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -27,4 +27,4 @@ try: except ImportError: pass -__version__ = "0.99.2" +__version__ = "0.99.3" diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 5992d30623..e8112d5f05 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -621,13 +621,13 @@ class Auth(object): Returns: True if the the sender is allowed to redact the target event if the - target event was created by them. + target event was created by them. False if the sender is allowed to redact the target event with no - further checks. + further checks. Raises: AuthError if the event sender is definitely not allowed to redact - the target event. + the target event. """ return event_auth.check_redaction(room_version, event, auth_events) @@ -743,9 +743,9 @@ class Auth(object): Returns: Deferred[tuple[str, str|None]]: Resolves to the current membership of - the user in the room and the membership event ID of the user. If - the user is not in the room and never has been, then - `(Membership.JOIN, None)` is returned. + the user in the room and the membership event ID of the user. If + the user is not in the room and never has been, then + `(Membership.JOIN, None)` is returned. """ try: @@ -777,20 +777,22 @@ class Auth(object): Args: user_id(str|None): If present, checks for presence against existing - MAU cohort + MAU cohort threepid(dict|None): If present, checks for presence against configured - reserved threepid. Used in cases where the user is trying register - with a MAU blocked server, normally they would be rejected but their - threepid is on the reserved list. user_id and - threepid should never be set at the same time. + reserved threepid. Used in cases where the user is trying register + with a MAU blocked server, normally they would be rejected but their + threepid is on the reserved list. user_id and + threepid should never be set at the same time. """ # Never fail an auth check for the server notices users or support user # This can be a problem where event creation is prohibited due to blocking - is_support = yield self.store.is_support_user(user_id) - if user_id == self.hs.config.server_notices_mxid or is_support: - return + if user_id is not None: + if user_id == self.hs.config.server_notices_mxid: + return + if (yield self.store.is_support_user(user_id)): + return if self.hs.config.hs_disabled: raise ResourceLimitError( diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index 3bb5b3da37..296c4a1c17 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -14,6 +14,8 @@ import collections +from synapse.api.errors import LimitExceededError + class Ratelimiter(object): """ @@ -23,12 +25,13 @@ class Ratelimiter(object): def __init__(self): self.message_counts = collections.OrderedDict() - def send_message(self, user_id, time_now_s, msg_rate_hz, burst_count, update=True): - """Can the user send a message? + def can_do_action(self, key, time_now_s, rate_hz, burst_count, update=True): + """Can the entity (e.g. user or IP address) perform the action? Args: - user_id: The user sending a message. + key: The key we should use when rate limiting. Can be a user ID + (when sending events), an IP address, etc. time_now_s: The time now. - msg_rate_hz: The long term number of messages a user can send in a + rate_hz: The long term number of messages a user can send in a second. burst_count: How many messages the user can send before being limited. @@ -41,10 +44,10 @@ class Ratelimiter(object): """ self.prune_message_counts(time_now_s) message_count, time_start, _ignored = self.message_counts.get( - user_id, (0., time_now_s, None), + key, (0., time_now_s, None), ) time_delta = time_now_s - time_start - sent_count = message_count - time_delta * msg_rate_hz + sent_count = message_count - time_delta * rate_hz if sent_count < 0: allowed = True time_start = time_now_s @@ -56,13 +59,13 @@ class Ratelimiter(object): message_count += 1 if update: - self.message_counts[user_id] = ( - message_count, time_start, msg_rate_hz + self.message_counts[key] = ( + message_count, time_start, rate_hz ) - if msg_rate_hz > 0: + if rate_hz > 0: time_allowed = ( - time_start + (message_count - burst_count + 1) / msg_rate_hz + time_start + (message_count - burst_count + 1) / rate_hz ) if time_allowed < time_now_s: time_allowed = time_now_s @@ -72,12 +75,22 @@ class Ratelimiter(object): return allowed, time_allowed def prune_message_counts(self, time_now_s): - for user_id in list(self.message_counts.keys()): - message_count, time_start, msg_rate_hz = ( - self.message_counts[user_id] + for key in list(self.message_counts.keys()): + message_count, time_start, rate_hz = ( + self.message_counts[key] ) time_delta = time_now_s - time_start - if message_count - time_delta * msg_rate_hz > 0: + if message_count - time_delta * rate_hz > 0: break else: - del self.message_counts[user_id] + del self.message_counts[key] + + def ratelimit(self, key, time_now_s, rate_hz, burst_count, update=True): + allowed, time_allowed = self.can_do_action( + key, time_now_s, rate_hz, burst_count, update + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now_s)), + ) diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 32e8b8a3f5..d4c6c4c8e2 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -63,12 +63,13 @@ def start_worker_reactor(appname, config): start_reactor( appname, - config.soft_file_limit, - config.gc_thresholds, - config.worker_pid_file, - config.worker_daemonize, - config.worker_cpu_affinity, - logger, + soft_file_limit=config.soft_file_limit, + gc_thresholds=config.gc_thresholds, + pid_file=config.worker_pid_file, + daemonize=config.worker_daemonize, + cpu_affinity=config.worker_cpu_affinity, + print_pidfile=config.print_pidfile, + logger=logger, ) @@ -79,6 +80,7 @@ def start_reactor( pid_file, daemonize, cpu_affinity, + print_pidfile, logger, ): """ Run the reactor in the main process @@ -93,6 +95,7 @@ def start_reactor( pid_file (str): name of pid file to write to if daemonize is True daemonize (bool): true to run the reactor in a background process cpu_affinity (int|None): cpu affinity mask + print_pidfile (bool): whether to print the pid file, if daemonize is True logger (logging.Logger): logger instance to pass to Daemonize """ @@ -124,6 +127,9 @@ def start_reactor( reactor.run() if daemonize: + if print_pidfile: + print(pid_file) + daemon = Daemonize( app=appname, pid=pid_file, diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 043b48f8f3..beaea64a61 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -33,9 +33,13 @@ from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore +from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore +from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.events import SlavedEventStore from synapse.replication.slave.storage.keys import SlavedKeyStore +from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore +from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore @@ -48,6 +52,8 @@ from synapse.rest.client.v1.room import ( RoomMemberListRestServlet, RoomStateRestServlet, ) +from synapse.rest.client.v2_alpha.account import ThreepidRestServlet +from synapse.rest.client.v2_alpha.keys import KeyChangesServlet, KeyQueryServlet from synapse.rest.client.v2_alpha.register import RegisterRestServlet from synapse.server import HomeServer from synapse.storage.engines import create_engine @@ -60,6 +66,10 @@ logger = logging.getLogger("synapse.app.client_reader") class ClientReaderSlavedStore( + SlavedDeviceInboxStore, + SlavedDeviceStore, + SlavedReceiptsStore, + SlavedPushRuleStore, SlavedAccountDataStore, SlavedEventStore, SlavedKeyStore, @@ -96,6 +106,9 @@ class ClientReaderServer(HomeServer): RoomEventContextServlet(self).register(resource) RegisterRestServlet(self).register(resource) LoginRestServlet(self).register(resource) + ThreepidRestServlet(self).register(resource) + KeyQueryServlet(self).register(resource) + KeyChangesServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index b116c17669..7da79dc827 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -21,7 +21,7 @@ from twisted.web.resource import NoResource import synapse from synapse import events -from synapse.api.urls import FEDERATION_PREFIX +from synapse.api.urls import FEDERATION_PREFIX, SERVER_KEY_V2_PREFIX from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig @@ -44,6 +44,7 @@ from synapse.replication.slave.storage.registration import SlavedRegistrationSto from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.key.v2 import KeyApiV2Resource from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.util.httpresourcetree import create_resource_tree @@ -99,6 +100,9 @@ class FederationReaderServer(HomeServer): ), }) + if name in ["keys", "federation"]: + resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index a461442fdc..9711a7147c 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -28,6 +28,7 @@ from synapse.config.logger import setup_logging from synapse.federation import send_queue from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore from synapse.replication.slave.storage.devices import SlavedDeviceStore @@ -37,8 +38,10 @@ from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.replication.tcp.streams import ReceiptsStream from synapse.server import HomeServer from synapse.storage.engines import create_engine +from synapse.types import ReadReceipt from synapse.util.async_helpers import Linearizer from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext, run_in_background @@ -202,6 +205,7 @@ class FederationSenderHandler(object): """ def __init__(self, hs, replication_client): self.store = hs.get_datastore() + self._is_mine_id = hs.is_mine_id self.federation_sender = hs.get_federation_sender() self.replication_client = replication_client @@ -234,6 +238,32 @@ class FederationSenderHandler(object): elif stream_name == "events": self.federation_sender.notify_new_events(token) + # ... and when new receipts happen + elif stream_name == ReceiptsStream.NAME: + run_as_background_process( + "process_receipts_for_federation", self._on_new_receipts, rows, + ) + + @defer.inlineCallbacks + def _on_new_receipts(self, rows): + """ + Args: + rows (iterable[synapse.replication.tcp.streams.ReceiptsStreamRow]): + new receipts to be processed + """ + for receipt in rows: + # we only want to send on receipts for our own users + if not self._is_mine_id(receipt.user_id): + continue + receipt_info = ReadReceipt( + receipt.room_id, + receipt.receipt_type, + receipt.user_id, + [receipt.event_id], + receipt.data, + ) + yield self.federation_sender.send_read_receipt(receipt_info) + @defer.inlineCallbacks def update_token(self, token): try: diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e8b6cc3114..869c028d1f 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -376,6 +376,7 @@ def setup(config_options): logger.info("Database prepared in %s.", config.database_config['name']) hs.setup() + hs.setup_master() @defer.inlineCallbacks def do_acme(): @@ -636,17 +637,15 @@ def run(hs): # be quite busy the first few minutes clock.call_later(5 * 60, start_phone_stats_home) - if hs.config.daemonize and hs.config.print_pidfile: - print(hs.config.pid_file) - _base.start_reactor( "synapse-homeserver", - hs.config.soft_file_limit, - hs.config.gc_thresholds, - hs.config.pid_file, - hs.config.daemonize, - hs.config.cpu_affinity, - logger, + soft_file_limit=hs.config.soft_file_limit, + gc_thresholds=hs.config.gc_thresholds, + pid_file=hs.config.pid_file, + daemonize=hs.config.daemonize, + cpu_affinity=hs.config.cpu_affinity, + print_pidfile=hs.config.print_pidfile, + logger=logger, ) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 5aec43b702..f7d7f153bb 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -137,7 +137,7 @@ class Config(object): @staticmethod def read_config_file(file_path): with open(file_path) as file_stream: - return yaml.load(file_stream) + return yaml.safe_load(file_stream) def invoke_all(self, name, *args, **kargs): results = [] @@ -180,9 +180,7 @@ class Config(object): Returns: str: the yaml config file """ - default_config = "# vim:ft=yaml\n" - - default_config += "\n\n".join( + default_config = "\n\n".join( dedent(conf) for conf in self.invoke_all( "default_config", @@ -216,14 +214,20 @@ class Config(object): " Defaults to the directory containing the last config file", ) + obj = cls() + + obj.invoke_all("add_arguments", config_parser) + config_args = config_parser.parse_args(argv) config_files = find_config_files(search_paths=config_args.config_path) - obj = cls() obj.read_config_files( config_files, keys_directory=config_args.keys_directory, generate_keys=False ) + + obj.invoke_all("read_arguments", config_args) + return obj @classmethod @@ -297,19 +301,26 @@ class Config(object): "Must specify a server_name to a generate config for." " Pass -H server.name." ) + + config_str = obj.generate_config( + config_dir_path=config_dir_path, + data_dir_path=os.getcwd(), + server_name=server_name, + report_stats=(config_args.report_stats == "yes"), + generate_secrets=True, + ) + if not cls.path_exists(config_dir_path): os.makedirs(config_dir_path) with open(config_path, "w") as config_file: - config_str = obj.generate_config( - config_dir_path=config_dir_path, - data_dir_path=os.getcwd(), - server_name=server_name, - report_stats=(config_args.report_stats == "yes"), - generate_secrets=True, + config_file.write( + "# vim:ft=yaml\n\n" ) - config = yaml.load(config_str) - obj.invoke_all("generate_files", config) config_file.write(config_str) + + config = yaml.safe_load(config_str) + obj.invoke_all("generate_files", config) + print( ( "A config file has been generated in %r for server name" @@ -379,7 +390,7 @@ class Config(object): server_name=server_name, generate_secrets=False, ) - config = yaml.load(config_string) + config = yaml.safe_load(config_string) config.pop("log_config") config.update(specified_config) @@ -394,7 +405,10 @@ class Config(object): self.invoke_all("generate_files", config) return - self.invoke_all("read_config", config) + self.parse_config_dict(config) + + def parse_config_dict(self, config_dict): + self.invoke_all("read_config", config_dict) def find_config_files(search_paths): diff --git a/synapse/config/api.py b/synapse/config/api.py index e8a753f002..5eb4f86fa2 100644 --- a/synapse/config/api.py +++ b/synapse/config/api.py @@ -34,10 +34,10 @@ class ApiConfig(Config): # A list of event types that will be included in the room_invite_state # - room_invite_state_types: - - "{JoinRules}" - - "{CanonicalAlias}" - - "{RoomAvatar}" - - "{RoomEncryption}" - - "{Name}" + #room_invite_state_types: + # - "{JoinRules}" + # - "{CanonicalAlias}" + # - "{RoomAvatar}" + # - "{RoomEncryption}" + # - "{Name}" """.format(**vars(EventTypes)) diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index c260d59464..7e89d345d8 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -37,14 +37,16 @@ class AppServiceConfig(Config): def default_config(cls, **kwargs): return """\ - # A list of application service config file to use + # A list of application service config files to use # - app_service_config_files: [] + #app_service_config_files: + # - app_service_1.yaml + # - app_service_2.yaml - # Whether or not to track application service IP addresses. Implicitly + # Uncomment to enable tracking of application service IP addresses. Implicitly # enables MAU tracking for application service users. # - track_appservice_user_ips: False + #track_appservice_user_ips: True """ @@ -66,7 +68,7 @@ def load_appservices(hostname, config_files): try: with open(config_file, 'r') as f: appservice = _load_appservice( - hostname, yaml.load(f), config_file + hostname, yaml.safe_load(f), config_file ) if appservice.id in seen_ids: raise ConfigError( diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index d25196be08..f7eebf26d2 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -18,11 +18,16 @@ from ._base import Config class CaptchaConfig(Config): def read_config(self, config): - self.recaptcha_private_key = config["recaptcha_private_key"] - self.recaptcha_public_key = config["recaptcha_public_key"] - self.enable_registration_captcha = config["enable_registration_captcha"] + self.recaptcha_private_key = config.get("recaptcha_private_key") + self.recaptcha_public_key = config.get("recaptcha_public_key") + self.enable_registration_captcha = config.get( + "enable_registration_captcha", False + ) self.captcha_bypass_secret = config.get("captcha_bypass_secret") - self.recaptcha_siteverify_api = config["recaptcha_siteverify_api"] + self.recaptcha_siteverify_api = config.get( + "recaptcha_siteverify_api", + "https://www.recaptcha.net/recaptcha/api/siteverify", + ) def default_config(self, **kwargs): return """\ @@ -31,21 +36,23 @@ class CaptchaConfig(Config): # This Home Server's ReCAPTCHA public key. # - recaptcha_public_key: "YOUR_PUBLIC_KEY" + #recaptcha_public_key: "YOUR_PUBLIC_KEY" # This Home Server's ReCAPTCHA private key. # - recaptcha_private_key: "YOUR_PRIVATE_KEY" + #recaptcha_private_key: "YOUR_PRIVATE_KEY" # Enables ReCaptcha checks when registering, preventing signup # unless a captcha is answered. Requires a valid ReCaptcha # public/private key. # - enable_registration_captcha: False + #enable_registration_captcha: false # A secret key used to bypass the captcha test entirely. + # #captcha_bypass_secret: "YOUR_SECRET_HERE" # The API endpoint to use for verifying m.login.recaptcha responses. - recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" + # + #recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" """ diff --git a/synapse/config/database.py b/synapse/config/database.py index c8890147a6..3c27ed6b4a 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -49,7 +49,8 @@ class DatabaseConfig(Config): def default_config(self, data_dir_path, **kwargs): database_path = os.path.join(data_dir_path, "homeserver.db") return """\ - # Database configuration + ## Database ## + database: # The database engine name name: "sqlite3" @@ -59,7 +60,8 @@ class DatabaseConfig(Config): database: "%(database_path)s" # Number of events to cache in memory. - event_cache_size: "10K" + # + #event_cache_size: 10K """ % locals() def read_arguments(self, args): diff --git a/synapse/config/groups.py b/synapse/config/groups.py index 46933a904c..e4be172a79 100644 --- a/synapse/config/groups.py +++ b/synapse/config/groups.py @@ -23,9 +23,9 @@ class GroupsConfig(Config): def default_config(self, **kwargs): return """\ - # Whether to allow non server admins to create groups on this server + # Uncomment to allow non-server-admin users to create groups on this server # - enable_group_creation: false + #enable_group_creation: true # If enabled, non server admins can only create groups with local parts # starting with this prefix diff --git a/synapse/config/key.py b/synapse/config/key.py index 35f05fa974..933928885a 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -38,15 +38,26 @@ logger = logging.getLogger(__name__) class KeyConfig(Config): def read_config(self, config): - self.signing_key = self.read_signing_key(config["signing_key_path"]) + # the signing key can be specified inline or in a separate file + if "signing_key" in config: + self.signing_key = read_signing_keys([config["signing_key"]]) + else: + self.signing_key = self.read_signing_key(config["signing_key_path"]) + self.old_signing_keys = self.read_old_signing_keys( config.get("old_signing_keys", {}) ) self.key_refresh_interval = self.parse_duration( - config["key_refresh_interval"] + config.get("key_refresh_interval", "1d"), ) self.perspectives = self.read_perspectives( - config["perspectives"] + config.get("perspectives", {}).get("servers", { + "matrix.org": {"verify_keys": { + "ed25519:auto": { + "key": "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw", + } + }} + }) ) self.macaroon_secret_key = config.get( @@ -88,7 +99,7 @@ class KeyConfig(Config): # Used to enable access token expiration. # - expire_access_token: False + #expire_access_token: False # a secret which is used to calculate HMACs for form values, to stop # falsification of values. Must be specified for the User Consent @@ -117,21 +128,21 @@ class KeyConfig(Config): # Determines how quickly servers will query to check which keys # are still valid. # - key_refresh_interval: "1d" # 1 Day. + #key_refresh_interval: 1d # The trusted servers to download signing keys from. # - perspectives: - servers: - "matrix.org": - verify_keys: - "ed25519:auto": - key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" + #perspectives: + # servers: + # "matrix.org": + # verify_keys: + # "ed25519:auto": + # key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" """ % locals() - def read_perspectives(self, perspectives_config): + def read_perspectives(self, perspectives_servers): servers = {} - for server_name, server_config in perspectives_config["servers"].items(): + for server_name, server_config in perspectives_servers.items(): for key_id, key_data in server_config["verify_keys"].items(): if is_signing_algorithm_supported(key_id): key_base64 = key_data["key"] diff --git a/synapse/config/logger.py b/synapse/config/logger.py index f6940b65fd..c1febbe9d3 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -81,7 +81,9 @@ class LoggingConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): log_config = os.path.join(config_dir_path, server_name + ".log.config") - return """ + return """\ + ## Logging ## + # A yaml python logging config file # log_config: "%(log_config)s" @@ -193,7 +195,7 @@ def setup_logging(config, use_worker_options=False): else: def load_log_config(): with open(log_config, 'r') as f: - logging.config.dictConfig(yaml.load(f)) + logging.config.dictConfig(yaml.safe_load(f)) def sighup(*args): # it might be better to use a file watcher or something for this. diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index ed0498c634..2de51979d8 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -24,7 +24,7 @@ MISSING_SENTRY = ( class MetricsConfig(Config): def read_config(self, config): - self.enable_metrics = config["enable_metrics"] + self.enable_metrics = config.get("enable_metrics", False) self.report_stats = config.get("report_stats", None) self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") @@ -48,7 +48,7 @@ class MetricsConfig(Config): # Enable collection and rendering of performance metrics # - enable_metrics: False + #enable_metrics: False # Enable sentry integration # NOTE: While attempts are made to ensure that the logs don't contain diff --git a/synapse/config/password.py b/synapse/config/password.py index 2a52b9db54..eea59e772b 100644 --- a/synapse/config/password.py +++ b/synapse/config/password.py @@ -22,16 +22,21 @@ class PasswordConfig(Config): def read_config(self, config): password_config = config.get("password_config", {}) + if password_config is None: + password_config = {} + self.password_enabled = password_config.get("enabled", True) self.password_pepper = password_config.get("pepper", "") def default_config(self, config_dir_path, server_name, **kwargs): - return """ - # Enable password for login. - # + return """\ password_config: - enabled: true + # Uncomment to disable password login + # + #enabled: false + # Uncomment and change to a secret random string for extra security. # DO NOT CHANGE THIS AFTER INITIAL SETUP! - #pepper: "" + # + #pepper: "EVEN_MORE_SECRET" """ diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 54b71e6841..5a68399e63 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -15,17 +15,36 @@ from ._base import Config +class RateLimitConfig(object): + def __init__(self, config): + self.per_second = config.get("per_second", 0.17) + self.burst_count = config.get("burst_count", 3.0) + + class RatelimitConfig(Config): def read_config(self, config): - self.rc_messages_per_second = config["rc_messages_per_second"] - self.rc_message_burst_count = config["rc_message_burst_count"] + self.rc_messages_per_second = config.get("rc_messages_per_second", 0.2) + self.rc_message_burst_count = config.get("rc_message_burst_count", 10.0) + + self.rc_registration = RateLimitConfig(config.get("rc_registration", {})) + + rc_login_config = config.get("rc_login", {}) + self.rc_login_address = RateLimitConfig(rc_login_config.get("address", {})) + self.rc_login_account = RateLimitConfig(rc_login_config.get("account", {})) + self.rc_login_failed_attempts = RateLimitConfig( + rc_login_config.get("failed_attempts", {}), + ) - self.federation_rc_window_size = config["federation_rc_window_size"] - self.federation_rc_sleep_limit = config["federation_rc_sleep_limit"] - self.federation_rc_sleep_delay = config["federation_rc_sleep_delay"] - self.federation_rc_reject_limit = config["federation_rc_reject_limit"] - self.federation_rc_concurrent = config["federation_rc_concurrent"] + self.federation_rc_window_size = config.get("federation_rc_window_size", 1000) + self.federation_rc_sleep_limit = config.get("federation_rc_sleep_limit", 10) + self.federation_rc_sleep_delay = config.get("federation_rc_sleep_delay", 500) + self.federation_rc_reject_limit = config.get("federation_rc_reject_limit", 50) + self.federation_rc_concurrent = config.get("federation_rc_concurrent", 3) + + self.federation_rr_transactions_per_room_per_second = config.get( + "federation_rr_transactions_per_room_per_second", 50, + ) def default_config(self, **kwargs): return """\ @@ -33,33 +52,75 @@ class RatelimitConfig(Config): # Number of messages a client can send per second # - rc_messages_per_second: 0.2 + #rc_messages_per_second: 0.2 # Number of message a client can send before being throttled # - rc_message_burst_count: 10.0 + #rc_message_burst_count: 10.0 + + # Ratelimiting settings for registration and login. + # + # Each ratelimiting configuration is made of two parameters: + # - per_second: number of requests a client can send per second. + # - burst_count: number of requests a client can send before being throttled. + # + # Synapse currently uses the following configurations: + # - one for registration that ratelimits registration requests based on the + # client's IP address. + # - one for login that ratelimits login requests based on the client's IP + # address. + # - one for login that ratelimits login requests based on the account the + # client is attempting to log into. + # - one for login that ratelimits login requests based on the account the + # client is attempting to log into, based on the amount of failed login + # attempts for this account. + # + # The defaults are as shown below. + # + #rc_registration: + # per_second: 0.17 + # burst_count: 3 + # + #rc_login: + # address: + # per_second: 0.17 + # burst_count: 3 + # account: + # per_second: 0.17 + # burst_count: 3 + # failed_attempts: + # per_second: 0.17 + # burst_count: 3 # The federation window size in milliseconds # - federation_rc_window_size: 1000 + #federation_rc_window_size: 1000 # The number of federation requests from a single server in a window # before the server will delay processing the request. # - federation_rc_sleep_limit: 10 + #federation_rc_sleep_limit: 10 # The duration in milliseconds to delay processing events from # remote servers by if they go over the sleep limit. # - federation_rc_sleep_delay: 500 + #federation_rc_sleep_delay: 500 # The maximum number of concurrent federation requests allowed # from a single server # - federation_rc_reject_limit: 50 + #federation_rc_reject_limit: 50 # The number of federation requests to concurrently process from a # single server # - federation_rc_concurrent: 3 + #federation_rc_concurrent: 3 + + # Target outgoing federation transaction frequency for sending read-receipts, + # per-room. + # + # If we end up trying to send out more read-receipts, they will get buffered up + # into fewer transactions. + # + #federation_rr_transactions_per_room_per_second: 50 """ diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 2881482f96..f6b2b9ceee 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -24,7 +24,7 @@ class RegistrationConfig(Config): def read_config(self, config): self.enable_registration = bool( - strtobool(str(config["enable_registration"])) + strtobool(str(config.get("enable_registration", False))) ) if "disable_registration" in config: self.enable_registration = not bool( @@ -36,7 +36,10 @@ class RegistrationConfig(Config): self.registration_shared_secret = config.get("registration_shared_secret") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) - self.trusted_third_party_id_servers = config["trusted_third_party_id_servers"] + self.trusted_third_party_id_servers = config.get( + "trusted_third_party_id_servers", + ["matrix.org", "vector.im"], + ) self.default_identity_server = config.get("default_identity_server") self.allow_guest_access = config.get("allow_guest_access", False) @@ -64,9 +67,13 @@ class RegistrationConfig(Config): return """\ ## Registration ## + # + # Registration can be rate-limited using the parameters in the "Ratelimiting" + # section of this file. # Enable registration for new users. - enable_registration: False + # + #enable_registration: false # The user must provide all of the below types of 3PID when registering. # @@ -77,7 +84,7 @@ class RegistrationConfig(Config): # Explicitly disable asking for MSISDNs from the registration # flow (overrides registrations_require_3pid if MSISDNs are set as required) # - #disable_msisdn_registration: True + #disable_msisdn_registration: true # Mandate that users are only allowed to associate certain formats of # 3PIDs with accounts on this server. @@ -90,8 +97,8 @@ class RegistrationConfig(Config): # - medium: msisdn # pattern: '\\+44' - # If set, allows registration by anyone who also has the shared - # secret, even if registration is otherwise disabled. + # If set, allows registration of standard or admin accounts by anyone who + # has the shared secret, even if registration is otherwise disabled. # %(registration_shared_secret)s @@ -101,13 +108,13 @@ class RegistrationConfig(Config): # N.B. that increasing this will exponentially increase the time required # to register or login - e.g. 24 => 2^24 rounds which will take >20 mins. # - bcrypt_rounds: 12 + #bcrypt_rounds: 12 # Allows users to register as guests without a password/email/etc, and # participate in rooms hosted on this server which have been made # accessible to anonymous users. # - allow_guest_access: False + #allow_guest_access: false # The identity server which we suggest that clients should use when users log # in on this server. @@ -123,9 +130,9 @@ class RegistrationConfig(Config): # Also defines the ID server which will be called when an account is # deactivated (one will be picked arbitrarily). # - trusted_third_party_id_servers: - - matrix.org - - vector.im + #trusted_third_party_id_servers: + # - matrix.org + # - vector.im # Users who register on this homeserver will automatically be joined # to these rooms @@ -139,7 +146,7 @@ class RegistrationConfig(Config): # Setting to false means that if the rooms are not manually created, # users cannot be auto-joined since they do not exist. # - autocreate_auto_join_rooms: true + #autocreate_auto_join_rooms: true """ % locals() def add_arguments(self, parser): diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 97db2a5b7a..3f34ad9b2a 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -19,6 +19,36 @@ from synapse.util.module_loader import load_module from ._base import Config, ConfigError +DEFAULT_THUMBNAIL_SIZES = [ + { + "width": 32, + "height": 32, + "method": "crop", + }, { + "width": 96, + "height": 96, + "method": "crop", + }, { + "width": 320, + "height": 240, + "method": "scale", + }, { + "width": 640, + "height": 480, + "method": "scale", + }, { + "width": 800, + "height": 600, + "method": "scale" + }, +] + +THUMBNAIL_SIZE_YAML = """\ + # - width: %(width)i + # height: %(height)i + # method: %(method)s +""" + MISSING_NETADDR = ( "Missing netaddr library. This is required for URL preview API." ) @@ -77,9 +107,9 @@ def parse_thumbnail_requirements(thumbnail_sizes): class ContentRepositoryConfig(Config): def read_config(self, config): - self.max_upload_size = self.parse_size(config["max_upload_size"]) - self.max_image_pixels = self.parse_size(config["max_image_pixels"]) - self.max_spider_size = self.parse_size(config["max_spider_size"]) + self.max_upload_size = self.parse_size(config.get("max_upload_size", "10M")) + self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M")) + self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M")) self.media_store_path = self.ensure_directory(config["media_store_path"]) @@ -139,9 +169,9 @@ class ContentRepositoryConfig(Config): ) self.uploads_path = self.ensure_directory(config["uploads_path"]) - self.dynamic_thumbnails = config["dynamic_thumbnails"] + self.dynamic_thumbnails = config.get("dynamic_thumbnails", False) self.thumbnail_requirements = parse_thumbnail_requirements( - config["thumbnail_sizes"] + config.get("thumbnail_sizes", DEFAULT_THUMBNAIL_SIZES), ) self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: @@ -178,6 +208,13 @@ class ContentRepositoryConfig(Config): def default_config(self, data_dir_path, **kwargs): media_store = os.path.join(data_dir_path, "media_store") uploads_path = os.path.join(data_dir_path, "uploads") + + formatted_thumbnail_sizes = "".join( + THUMBNAIL_SIZE_YAML % s for s in DEFAULT_THUMBNAIL_SIZES + ) + # strip final NL + formatted_thumbnail_sizes = formatted_thumbnail_sizes[:-1] + return r""" # Directory where uploaded images and attachments are stored. # @@ -204,11 +241,11 @@ class ContentRepositoryConfig(Config): # The largest allowed upload size in bytes # - max_upload_size: "10M" + #max_upload_size: 10M # Maximum number of pixels that will be thumbnailed # - max_image_pixels: "32M" + #max_image_pixels: 32M # Whether to generate new thumbnails on the fly to precisely match # the resolution requested by the client. If true then whenever @@ -216,32 +253,18 @@ class ContentRepositoryConfig(Config): # generate a new thumbnail. If false the server will pick a thumbnail # from a precalculated list. # - dynamic_thumbnails: false + #dynamic_thumbnails: false # List of thumbnails to precalculate when an image is uploaded. # - thumbnail_sizes: - - width: 32 - height: 32 - method: crop - - width: 96 - height: 96 - method: crop - - width: 320 - height: 240 - method: scale - - width: 640 - height: 480 - method: scale - - width: 800 - height: 600 - method: scale + #thumbnail_sizes: +%(formatted_thumbnail_sizes)s # Is the preview URL API enabled? If enabled, you *must* specify # an explicit url_preview_ip_range_blacklist of IPs that the spider is # denied from accessing. # - url_preview_enabled: False + #url_preview_enabled: false # List of IP address CIDR ranges that the URL preview spider is denied # from accessing. There are no defaults: you must explicitly @@ -306,6 +329,6 @@ class ContentRepositoryConfig(Config): # - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' # The largest allowed URL preview spidering size in bytes - max_spider_size: "10M" - + # + #max_spider_size: 10M """ % locals() diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 9b897abe3c..8a9fded4c5 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -20,6 +20,10 @@ from ._base import Config, ConfigError class RoomDirectoryConfig(Config): def read_config(self, config): + self.enable_room_list_search = config.get( + "enable_room_list_search", True, + ) + alias_creation_rules = config.get("alias_creation_rules") if alias_creation_rules is not None: @@ -54,6 +58,12 @@ class RoomDirectoryConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): return """ + # Uncomment to disable searching the public room list. When disabled + # blocks searching local and remote room lists for local and remote + # users by always returning an empty list for all queries. + # + #enable_room_list_search: false + # The `alias_creation` option controls who's allowed to create aliases # on this server. # diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index aff0a1f00c..39b9eb29c2 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -64,7 +64,7 @@ class SAML2Config(Config): } def default_config(self, config_dir_path, server_name, **kwargs): - return """ + return """\ # Enable SAML2 for registration and login. Uses pysaml2. # # `sp_config` is the configuration for the pysaml2 Service Provider. diff --git a/synapse/config/server.py b/synapse/config/server.py index 4200f10da3..08e4e45482 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -45,7 +45,7 @@ class ServerConfig(Config): self.pid_file = self.abspath(config.get("pid_file")) self.web_client_location = config.get("web_client_location", None) - self.soft_file_limit = config["soft_file_limit"] + self.soft_file_limit = config.get("soft_file_limit", 0) self.daemonize = config.get("daemonize") self.print_pidfile = config.get("print_pidfile") self.user_agent_suffix = config.get("user_agent_suffix") @@ -126,6 +126,11 @@ class ServerConfig(Config): self.public_baseurl += '/' self.start_pushers = config.get("start_pushers", True) + # (undocumented) option for torturing the worker-mode replication a bit, + # for testing. The value defines the number of milliseconds to pause before + # sending out any replication updates. + self.replication_torture_level = config.get("replication_torture_level") + self.listeners = [] for listener in config.get("listeners", []): if not isinstance(listener.get("port", None), int): @@ -260,9 +265,11 @@ class ServerConfig(Config): # This is used by remote servers to connect to this server, # e.g. matrix.org, localhost:8080, etc. # This is also the last part of your UserID. + # server_name: "%(server_name)s" # When running as a daemon, the file to store the pid in + # pid_file: %(pid_file)s # CPU affinity mask. Setting this restricts the CPUs on which the @@ -304,10 +311,12 @@ class ServerConfig(Config): # Set the soft limit on the number of file descriptors synapse can use # Zero is used to indicate synapse should set the soft limit to the # hard limit. - soft_file_limit: 0 + # + #soft_file_limit: 0 # Set to false to disable presence tracking on this homeserver. - use_presence: true + # + #use_presence: false # The GC threshold parameters to pass to `gc.set_threshold`, if defined # diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 40045de7ac..f0014902da 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -181,6 +181,11 @@ class TlsConfig(Config): # See 'ACME support' below to enable auto-provisioning this certificate via # Let's Encrypt. # + # If supplying your own, be sure to use a `.pem` file that includes the + # full certificate chain including any intermediate certificates (for + # instance, if using certbot, use `fullchain.pem` as your certificate, + # not `cert.pem`). + # #tls_certificate_path: "%(tls_certificate_path)s" # PEM-encoded private key for TLS diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index fab3a7d1c8..142754a7dc 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -22,9 +22,13 @@ class UserDirectoryConfig(Config): """ def read_config(self, config): + self.user_directory_search_enabled = True self.user_directory_search_all_users = False user_directory_config = config.get("user_directory", None) if user_directory_config: + self.user_directory_search_enabled = ( + user_directory_config.get("enabled", True) + ) self.user_directory_search_all_users = ( user_directory_config.get("search_all_users", False) ) @@ -33,6 +37,10 @@ class UserDirectoryConfig(Config): return """ # User Directory configuration # + # 'enabled' defines whether users can search the user directory. If + # false then empty responses are returned to all queries. Defaults to + # true. + # # 'search_all_users' defines whether to search all users visible to your HS # when searching the user directory, rather than limiting to users visible # in public rooms. Defaults to false. If you set it True, you'll have to run @@ -40,5 +48,6 @@ class UserDirectoryConfig(Config): # on your database to tell it to rebuild the user_directory search indexes. # #user_directory: + # enabled: true # search_all_users: false """ diff --git a/synapse/config/voip.py b/synapse/config/voip.py index 257f7c86e7..2a1f005a37 100644 --- a/synapse/config/voip.py +++ b/synapse/config/voip.py @@ -22,7 +22,9 @@ class VoipConfig(Config): self.turn_shared_secret = config.get("turn_shared_secret") self.turn_username = config.get("turn_username") self.turn_password = config.get("turn_password") - self.turn_user_lifetime = self.parse_duration(config["turn_user_lifetime"]) + self.turn_user_lifetime = self.parse_duration( + config.get("turn_user_lifetime", "1h"), + ) self.turn_allow_guests = config.get("turn_allow_guests", True) def default_config(self, **kwargs): @@ -45,7 +47,7 @@ class VoipConfig(Config): # How long generated TURN credentials last # - turn_user_lifetime: "1h" + #turn_user_lifetime: 1h # Whether guests should be allowed to use the TURN server. # This defaults to True, otherwise VoIP will be unreliable for guests. @@ -53,5 +55,5 @@ class VoipConfig(Config): # connect to arbitrary endpoints without having first signed up for a # valid account (e.g. by passing a CAPTCHA). # - turn_allow_guests: True + #turn_allow_guests: True """ diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 80baf0ce0e..bfbd8b6c91 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -28,7 +28,7 @@ class WorkerConfig(Config): if self.worker_app == "synapse.app.homeserver": self.worker_app = None - self.worker_listeners = config.get("worker_listeners") + self.worker_listeners = config.get("worker_listeners", []) self.worker_daemonize = config.get("worker_daemonize") self.worker_pid_file = config.get("worker_pid_file") self.worker_log_file = config.get("worker_log_file") @@ -48,6 +48,17 @@ class WorkerConfig(Config): self.worker_main_http_uri = config.get("worker_main_http_uri", None) self.worker_cpu_affinity = config.get("worker_cpu_affinity") + # This option is really only here to support `--manhole` command line + # argument. + manhole = config.get("worker_manhole") + if manhole: + self.worker_listeners.append({ + "port": manhole, + "bind_addresses": ["127.0.0.1"], + "type": "manhole", + "tls": False, + }) + if self.worker_listeners: for listener in self.worker_listeners: bind_address = listener.pop("bind_address", None) @@ -57,3 +68,18 @@ class WorkerConfig(Config): bind_addresses.append(bind_address) elif not bind_addresses: bind_addresses.append('') + + def read_arguments(self, args): + # We support a bunch of command line arguments that override options in + # the config. A lot of these options have a worker_* prefix when running + # on workers so we also have to override them when command line options + # are specified. + + if args.daemonize is not None: + self.worker_daemonize = args.daemonize + if args.log_config is not None: + self.worker_log_config = args.log_config + if args.log_file is not None: + self.worker_log_file = args.log_file + if args.manhole is not None: + self.worker_manhole = args.worker_manhole diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 7474fd515f..0207cd989a 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -686,9 +686,9 @@ def _handle_key_deferred(verify_request): try: with PreserveLoggingContext(): _, key_id, verify_key = yield verify_request.deferred - except (IOError, RequestSendFailed) as e: + except KeyLookupError as e: logger.warn( - "Got IOError when downloading keys for %s: %s %s", + "Failed to download keys for %s: %s %s", server_name, type(e).__name__, str(e), ) raise SynapseError( diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 20c1ab4203..fafa135182 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -77,6 +77,20 @@ class _EventInternalMetadata(object): """ return getattr(self, "recheck_redaction", False) + def is_soft_failed(self): + """Whether the event has been soft failed. + + Soft failed events should be handled as usual, except: + 1. They should not go down sync or event streams, or generally + sent to clients. + 2. They should not be added to the forward extremities (and + therefore not to current state). + + Returns: + bool + """ + return getattr(self, "soft_failed", False) + def _event_dict_property(key): # We want to be able to use hasattr with the event dict properties. @@ -127,7 +141,6 @@ class EventBase(object): origin = _event_dict_property("origin") origin_server_ts = _event_dict_property("origin_server_ts") prev_events = _event_dict_property("prev_events") - prev_state = _event_dict_property("prev_state") redacts = _event_dict_property("redacts") room_id = _event_dict_property("room_id") sender = _event_dict_property("sender") diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 569eb277a9..81f3b4b1ff 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -886,6 +886,9 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry): def on_edu(self, edu_type, origin, content): """Overrides FederationHandlerRegistry """ + if not self.config.use_presence and edu_type == "m.presence": + return + handler = self.edu_handlers.get(edu_type) if handler: return super(ReplicationFederationHandlerRegistry, self).on_edu( diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 6f5995735a..04d04a4457 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -46,7 +46,7 @@ logger = logging.getLogger(__name__) class FederationRemoteSendQueue(object): - """A drop in replacement for TransactionQueue""" + """A drop in replacement for FederationSender""" def __init__(self, hs): self.server_name = hs.hostname @@ -154,13 +154,17 @@ class FederationRemoteSendQueue(object): del self.device_messages[key] def notify_new_events(self, current_id): - """As per TransactionQueue""" + """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. pass - def send_edu(self, destination, edu_type, content, key=None): - """As per TransactionQueue""" + def build_and_send_edu(self, destination, edu_type, content, key=None): + """As per FederationSender""" + if destination == self.server_name: + logger.info("Not sending EDU to ourselves") + return + pos = self._next_pos() edu = Edu( @@ -179,8 +183,17 @@ class FederationRemoteSendQueue(object): self.notifier.on_new_replication_data() + def send_read_receipt(self, receipt): + """As per FederationSender + + Args: + receipt (synapse.types.ReadReceipt): + """ + # nothing to do here: the replication listener will handle it. + pass + def send_presence(self, states): - """As per TransactionQueue + """As per FederationSender Args: states (list(UserPresenceState)) @@ -197,7 +210,7 @@ class FederationRemoteSendQueue(object): self.notifier.on_new_replication_data() def send_device_messages(self, destination): - """As per TransactionQueue""" + """As per FederationSender""" pos = self._next_pos() self.device_messages[pos] = destination self.notifier.on_new_replication_data() @@ -435,7 +448,7 @@ def process_rows_for_federation(transaction_queue, rows): transaction queue ready for sending to the relevant homeservers. Args: - transaction_queue (TransactionQueue) + transaction_queue (FederationSender) rows (list(synapse.replication.tcp.streams.FederationStreamRow)) """ @@ -465,15 +478,11 @@ def process_rows_for_federation(transaction_queue, rows): for destination, edu_map in iteritems(buff.keyed_edus): for key, edu in edu_map.items(): - transaction_queue.send_edu( - edu.destination, edu.edu_type, edu.content, key=key, - ) + transaction_queue.send_edu(edu, key) for destination, edu_list in iteritems(buff.edus): for edu in edu_list: - transaction_queue.send_edu( - edu.destination, edu.edu_type, edu.content, key=None, - ) + transaction_queue.send_edu(edu, None) for destination in buff.device_destinations: transaction_queue.send_device_messages(destination) diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py new file mode 100644 index 0000000000..1dc041752b --- /dev/null +++ b/synapse/federation/sender/__init__.py @@ -0,0 +1,465 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from six import itervalues + +from prometheus_client import Counter + +from twisted.internet import defer + +import synapse.metrics +from synapse.federation.sender.per_destination_queue import PerDestinationQueue +from synapse.federation.sender.transaction_manager import TransactionManager +from synapse.federation.units import Edu +from synapse.handlers.presence import get_interested_remotes +from synapse.metrics import ( + LaterGauge, + event_processing_loop_counter, + event_processing_loop_room_count, + events_processed_counter, +) +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.util import logcontext +from synapse.util.metrics import measure_func + +logger = logging.getLogger(__name__) + +sent_pdus_destination_dist_count = Counter( + "synapse_federation_client_sent_pdu_destinations:count", + "Number of PDUs queued for sending to one or more destinations", +) + +sent_pdus_destination_dist_total = Counter( + "synapse_federation_client_sent_pdu_destinations:total", "" + "Total number of PDUs queued for sending across all destinations", +) + + +class FederationSender(object): + def __init__(self, hs): + self.hs = hs + self.server_name = hs.hostname + + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + + self.clock = hs.get_clock() + self.is_mine_id = hs.is_mine_id + + self._transaction_manager = TransactionManager(hs) + + # map from destination to PerDestinationQueue + self._per_destination_queues = {} # type: dict[str, PerDestinationQueue] + + LaterGauge( + "synapse_federation_transaction_queue_pending_destinations", + "", + [], + lambda: sum( + 1 for d in self._per_destination_queues.values() + if d.transmission_loop_running + ), + ) + + # Map of user_id -> UserPresenceState for all the pending presence + # to be sent out by user_id. Entries here get processed and put in + # pending_presence_by_dest + self.pending_presence = {} + + LaterGauge( + "synapse_federation_transaction_queue_pending_pdus", + "", + [], + lambda: sum( + d.pending_pdu_count() for d in self._per_destination_queues.values() + ), + ) + LaterGauge( + "synapse_federation_transaction_queue_pending_edus", + "", + [], + lambda: sum( + d.pending_edu_count() for d in self._per_destination_queues.values() + ), + ) + + self._order = 1 + + self._is_processing = False + self._last_poked_id = -1 + + self._processing_pending_presence = False + + # map from room_id to a set of PerDestinationQueues which we believe are + # awaiting a call to flush_read_receipts_for_room. The presence of an entry + # here for a given room means that we are rate-limiting RR flushes to that room, + # and that there is a pending call to _flush_rrs_for_room in the system. + self._queues_awaiting_rr_flush_by_room = { + } # type: dict[str, set[PerDestinationQueue]] + + self._rr_txn_interval_per_room_ms = ( + 1000.0 / hs.get_config().federation_rr_transactions_per_room_per_second + ) + + def _get_per_destination_queue(self, destination): + """Get or create a PerDestinationQueue for the given destination + + Args: + destination (str): server_name of remote server + + Returns: + PerDestinationQueue + """ + queue = self._per_destination_queues.get(destination) + if not queue: + queue = PerDestinationQueue(self.hs, self._transaction_manager, destination) + self._per_destination_queues[destination] = queue + return queue + + def notify_new_events(self, current_id): + """This gets called when we have some new events we might want to + send out to other servers. + """ + self._last_poked_id = max(current_id, self._last_poked_id) + + if self._is_processing: + return + + # fire off a processing loop in the background + run_as_background_process( + "process_event_queue_for_federation", + self._process_event_queue_loop, + ) + + @defer.inlineCallbacks + def _process_event_queue_loop(self): + try: + self._is_processing = True + while True: + last_token = yield self.store.get_federation_out_pos("events") + next_token, events = yield self.store.get_all_new_events_stream( + last_token, self._last_poked_id, limit=100, + ) + + logger.debug("Handling %s -> %s", last_token, next_token) + + if not events and next_token >= self._last_poked_id: + break + + @defer.inlineCallbacks + def handle_event(event): + # Only send events for this server. + send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of() + is_mine = self.is_mine_id(event.sender) + if not is_mine and send_on_behalf_of is None: + return + + try: + # Get the state from before the event. + # We need to make sure that this is the state from before + # the event and not from after it. + # Otherwise if the last member on a server in a room is + # banned then it won't receive the event because it won't + # be in the room after the ban. + destinations = yield self.state.get_current_hosts_in_room( + event.room_id, latest_event_ids=event.prev_event_ids(), + ) + except Exception: + logger.exception( + "Failed to calculate hosts in room for event: %s", + event.event_id, + ) + return + + destinations = set(destinations) + + if send_on_behalf_of is not None: + # If we are sending the event on behalf of another server + # then it already has the event and there is no reason to + # send the event to it. + destinations.discard(send_on_behalf_of) + + logger.debug("Sending %s to %r", event, destinations) + + self._send_pdu(event, destinations) + + @defer.inlineCallbacks + def handle_room_events(events): + for event in events: + yield handle_event(event) + + events_by_room = {} + for event in events: + events_by_room.setdefault(event.room_id, []).append(event) + + yield logcontext.make_deferred_yieldable(defer.gatherResults( + [ + logcontext.run_in_background(handle_room_events, evs) + for evs in itervalues(events_by_room) + ], + consumeErrors=True + )) + + yield self.store.update_federation_out_pos( + "events", next_token + ) + + if events: + now = self.clock.time_msec() + ts = yield self.store.get_received_ts(events[-1].event_id) + + synapse.metrics.event_processing_lag.labels( + "federation_sender").set(now - ts) + synapse.metrics.event_processing_last_ts.labels( + "federation_sender").set(ts) + + events_processed_counter.inc(len(events)) + + event_processing_loop_room_count.labels( + "federation_sender" + ).inc(len(events_by_room)) + + event_processing_loop_counter.labels("federation_sender").inc() + + synapse.metrics.event_processing_positions.labels( + "federation_sender").set(next_token) + + finally: + self._is_processing = False + + def _send_pdu(self, pdu, destinations): + # We loop through all destinations to see whether we already have + # a transaction in progress. If we do, stick it in the pending_pdus + # table and we'll get back to it later. + + order = self._order + self._order += 1 + + destinations = set(destinations) + destinations.discard(self.server_name) + logger.debug("Sending to: %s", str(destinations)) + + if not destinations: + return + + sent_pdus_destination_dist_total.inc(len(destinations)) + sent_pdus_destination_dist_count.inc() + + for destination in destinations: + self._get_per_destination_queue(destination).send_pdu(pdu, order) + + @defer.inlineCallbacks + def send_read_receipt(self, receipt): + """Send a RR to any other servers in the room + + Args: + receipt (synapse.types.ReadReceipt): receipt to be sent + """ + + # Some background on the rate-limiting going on here. + # + # It turns out that if we attempt to send out RRs as soon as we get them from + # a client, then we end up trying to do several hundred Hz of federation + # transactions. (The number of transactions scales as O(N^2) on the size of a + # room, since in a large room we have both more RRs coming in, and more servers + # to send them to.) + # + # This leads to a lot of CPU load, and we end up getting behind. The solution + # currently adopted is as follows: + # + # The first receipt in a given room is sent out immediately, at time T0. Any + # further receipts are, in theory, batched up for N seconds, where N is calculated + # based on the number of servers in the room to achieve a transaction frequency + # of around 50Hz. So, for example, if there were 100 servers in the room, then + # N would be 100 / 50Hz = 2 seconds. + # + # Then, after T+N, we flush out any receipts that have accumulated, and restart + # the timer to flush out more receipts at T+2N, etc. If no receipts accumulate, + # we stop the cycle and go back to the start. + # + # However, in practice, it is often possible to flush out receipts earlier: in + # particular, if we are sending a transaction to a given server anyway (for + # example, because we have a PDU or a RR in another room to send), then we may + # as well send out all of the pending RRs for that server. So it may be that + # by the time we get to T+N, we don't actually have any RRs left to send out. + # Nevertheless we continue to buffer up RRs for the room in question until we + # reach the point that no RRs arrive between timer ticks. + # + # For even more background, see https://github.com/matrix-org/synapse/issues/4730. + + room_id = receipt.room_id + + # Work out which remote servers should be poked and poke them. + domains = yield self.state.get_current_hosts_in_room(room_id) + domains = [d for d in domains if d != self.server_name] + if not domains: + return + + queues_pending_flush = self._queues_awaiting_rr_flush_by_room.get( + room_id + ) + + # if there is no flush yet scheduled, we will send out these receipts with + # immediate flushes, and schedule the next flush for this room. + if queues_pending_flush is not None: + logger.debug("Queuing receipt for: %r", domains) + else: + logger.debug("Sending receipt to: %r", domains) + self._schedule_rr_flush_for_room(room_id, len(domains)) + + for domain in domains: + queue = self._get_per_destination_queue(domain) + queue.queue_read_receipt(receipt) + + # if there is already a RR flush pending for this room, then make sure this + # destination is registered for the flush + if queues_pending_flush is not None: + queues_pending_flush.add(queue) + else: + queue.flush_read_receipts_for_room(room_id) + + def _schedule_rr_flush_for_room(self, room_id, n_domains): + # that is going to cause approximately len(domains) transactions, so now back + # off for that multiplied by RR_TXN_INTERVAL_PER_ROOM + backoff_ms = self._rr_txn_interval_per_room_ms * n_domains + + logger.debug("Scheduling RR flush in %s in %d ms", room_id, backoff_ms) + self.clock.call_later(backoff_ms, self._flush_rrs_for_room, room_id) + self._queues_awaiting_rr_flush_by_room[room_id] = set() + + def _flush_rrs_for_room(self, room_id): + queues = self._queues_awaiting_rr_flush_by_room.pop(room_id) + logger.debug("Flushing RRs in %s to %s", room_id, queues) + + if not queues: + # no more RRs arrived for this room; we are done. + return + + # schedule the next flush + self._schedule_rr_flush_for_room(room_id, len(queues)) + + for queue in queues: + queue.flush_read_receipts_for_room(room_id) + + @logcontext.preserve_fn # the caller should not yield on this + @defer.inlineCallbacks + def send_presence(self, states): + """Send the new presence states to the appropriate destinations. + + This actually queues up the presence states ready for sending and + triggers a background task to process them and send out the transactions. + + Args: + states (list(UserPresenceState)) + """ + if not self.hs.config.use_presence: + # No-op if presence is disabled. + return + + # First we queue up the new presence by user ID, so multiple presence + # updates in quick successtion are correctly handled + # We only want to send presence for our own users, so lets always just + # filter here just in case. + self.pending_presence.update({ + state.user_id: state for state in states + if self.is_mine_id(state.user_id) + }) + + # We then handle the new pending presence in batches, first figuring + # out the destinations we need to send each state to and then poking it + # to attempt a new transaction. We linearize this so that we don't + # accidentally mess up the ordering and send multiple presence updates + # in the wrong order + if self._processing_pending_presence: + return + + self._processing_pending_presence = True + try: + while True: + states_map = self.pending_presence + self.pending_presence = {} + + if not states_map: + break + + yield self._process_presence_inner(list(states_map.values())) + except Exception: + logger.exception("Error sending presence states to servers") + finally: + self._processing_pending_presence = False + + @measure_func("txnqueue._process_presence") + @defer.inlineCallbacks + def _process_presence_inner(self, states): + """Given a list of states populate self.pending_presence_by_dest and + poke to send a new transaction to each destination + + Args: + states (list(UserPresenceState)) + """ + hosts_and_states = yield get_interested_remotes(self.store, states, self.state) + + for destinations, states in hosts_and_states: + for destination in destinations: + if destination == self.server_name: + continue + self._get_per_destination_queue(destination).send_presence(states) + + def build_and_send_edu(self, destination, edu_type, content, key=None): + """Construct an Edu object, and queue it for sending + + Args: + destination (str): name of server to send to + edu_type (str): type of EDU to send + content (dict): content of EDU + key (Any|None): clobbering key for this edu + """ + if destination == self.server_name: + logger.info("Not sending EDU to ourselves") + return + + edu = Edu( + origin=self.server_name, + destination=destination, + edu_type=edu_type, + content=content, + ) + + self.send_edu(edu, key) + + def send_edu(self, edu, key): + """Queue an EDU for sending + + Args: + edu (Edu): edu to send + key (Any|None): clobbering key for this edu + """ + queue = self._get_per_destination_queue(edu.destination) + if key: + queue.send_keyed_edu(edu, key) + else: + queue.send_edu(edu) + + def send_device_messages(self, destination): + if destination == self.server_name: + logger.info("Not sending device update to ourselves") + return + + self._get_per_destination_queue(destination).attempt_new_transaction() + + def get_current_token(self): + return 0 diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py new file mode 100644 index 0000000000..be99211003 --- /dev/null +++ b/synapse/federation/sender/per_destination_queue.py @@ -0,0 +1,378 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import datetime +import logging + +from prometheus_client import Counter + +from twisted.internet import defer + +from synapse.api.errors import ( + FederationDeniedError, + HttpResponseException, + RequestSendFailed, +) +from synapse.events import EventBase +from synapse.federation.units import Edu +from synapse.handlers.presence import format_user_presence_state +from synapse.metrics import sent_transactions_counter +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage import UserPresenceState +from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter + +logger = logging.getLogger(__name__) + + +sent_edus_counter = Counter( + "synapse_federation_client_sent_edus", + "Total number of EDUs successfully sent", +) + +sent_edus_by_type = Counter( + "synapse_federation_client_sent_edus_by_type", + "Number of sent EDUs successfully sent, by event type", + ["type"], +) + + +class PerDestinationQueue(object): + """ + Manages the per-destination transmission queues. + + Args: + hs (synapse.HomeServer): + transaction_sender (TransactionManager): + destination (str): the server_name of the destination that we are managing + transmission for. + """ + def __init__(self, hs, transaction_manager, destination): + self._server_name = hs.hostname + self._clock = hs.get_clock() + self._store = hs.get_datastore() + self._transaction_manager = transaction_manager + + self._destination = destination + self.transmission_loop_running = False + + # a list of tuples of (pending pdu, order) + self._pending_pdus = [] # type: list[tuple[EventBase, int]] + self._pending_edus = [] # type: list[Edu] + + # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered + # based on their key (e.g. typing events by room_id) + # Map of (edu_type, key) -> Edu + self._pending_edus_keyed = {} # type: dict[tuple[str, str], Edu] + + # Map of user_id -> UserPresenceState of pending presence to be sent to this + # destination + self._pending_presence = {} # type: dict[str, UserPresenceState] + + # room_id -> receipt_type -> user_id -> receipt_dict + self._pending_rrs = {} + self._rrs_pending_flush = False + + # stream_id of last successfully sent to-device message. + # NB: may be a long or an int. + self._last_device_stream_id = 0 + + # stream_id of last successfully sent device list update. + self._last_device_list_stream_id = 0 + + def __str__(self): + return "PerDestinationQueue[%s]" % self._destination + + def pending_pdu_count(self): + return len(self._pending_pdus) + + def pending_edu_count(self): + return ( + len(self._pending_edus) + + len(self._pending_presence) + + len(self._pending_edus_keyed) + ) + + def send_pdu(self, pdu, order): + """Add a PDU to the queue, and start the transmission loop if neccessary + + Args: + pdu (EventBase): pdu to send + order (int): + """ + self._pending_pdus.append((pdu, order)) + self.attempt_new_transaction() + + def send_presence(self, states): + """Add presence updates to the queue. Start the transmission loop if neccessary. + + Args: + states (iterable[UserPresenceState]): presence to send + """ + self._pending_presence.update({ + state.user_id: state for state in states + }) + self.attempt_new_transaction() + + def queue_read_receipt(self, receipt): + """Add a RR to the list to be sent. Doesn't start the transmission loop yet + (see flush_read_receipts_for_room) + + Args: + receipt (synapse.api.receipt_info.ReceiptInfo): receipt to be queued + """ + self._pending_rrs.setdefault( + receipt.room_id, {}, + ).setdefault( + receipt.receipt_type, {} + )[receipt.user_id] = { + "event_ids": receipt.event_ids, + "data": receipt.data, + } + + def flush_read_receipts_for_room(self, room_id): + # if we don't have any read-receipts for this room, it may be that we've already + # sent them out, so we don't need to flush. + if room_id not in self._pending_rrs: + return + self._rrs_pending_flush = True + self.attempt_new_transaction() + + def send_keyed_edu(self, edu, key): + self._pending_edus_keyed[(edu.edu_type, key)] = edu + self.attempt_new_transaction() + + def send_edu(self, edu): + self._pending_edus.append(edu) + self.attempt_new_transaction() + + def attempt_new_transaction(self): + """Try to start a new transaction to this destination + + If there is already a transaction in progress to this destination, + returns immediately. Otherwise kicks off the process of sending a + transaction in the background. + """ + # list of (pending_pdu, deferred, order) + if self.transmission_loop_running: + # XXX: this can get stuck on by a never-ending + # request at which point pending_pdus just keeps growing. + # we need application-layer timeouts of some flavour of these + # requests + logger.debug( + "TX [%s] Transaction already in progress", + self._destination + ) + return + + logger.debug("TX [%s] Starting transaction loop", self._destination) + + run_as_background_process( + "federation_transaction_transmission_loop", + self._transaction_transmission_loop, + ) + + @defer.inlineCallbacks + def _transaction_transmission_loop(self): + pending_pdus = [] + try: + self.transmission_loop_running = True + + # This will throw if we wouldn't retry. We do this here so we fail + # quickly, but we will later check this again in the http client, + # hence why we throw the result away. + yield get_retry_limiter(self._destination, self._clock, self._store) + + pending_pdus = [] + while True: + device_message_edus, device_stream_id, dev_list_id = ( + yield self._get_new_device_messages() + ) + + # BEGIN CRITICAL SECTION + # + # In order to avoid a race condition, we need to make sure that + # the following code (from popping the queues up to the point + # where we decide if we actually have any pending messages) is + # atomic - otherwise new PDUs or EDUs might arrive in the + # meantime, but not get sent because we hold the + # transmission_loop_running flag. + + pending_pdus = self._pending_pdus + + # We can only include at most 50 PDUs per transactions + pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] + + pending_edus = [] + + pending_edus.extend(self._get_rr_edus(force_flush=False)) + + # We can only include at most 100 EDUs per transactions + pending_edus.extend(self._pop_pending_edus(100 - len(pending_edus))) + + pending_edus.extend( + self._pending_edus_keyed.values() + ) + + self._pending_edus_keyed = {} + + pending_edus.extend(device_message_edus) + + pending_presence = self._pending_presence + self._pending_presence = {} + if pending_presence: + pending_edus.append( + Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.presence", + content={ + "push": [ + format_user_presence_state( + presence, self._clock.time_msec() + ) + for presence in pending_presence.values() + ] + }, + ) + ) + + if pending_pdus: + logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", + self._destination, len(pending_pdus)) + + if not pending_pdus and not pending_edus: + logger.debug("TX [%s] Nothing to send", self._destination) + self._last_device_stream_id = device_stream_id + return + + # if we've decided to send a transaction anyway, and we have room, we + # may as well send any pending RRs + if len(pending_edus) < 100: + pending_edus.extend(self._get_rr_edus(force_flush=True)) + + # END CRITICAL SECTION + + success = yield self._transaction_manager.send_new_transaction( + self._destination, pending_pdus, pending_edus + ) + if success: + sent_transactions_counter.inc() + sent_edus_counter.inc(len(pending_edus)) + for edu in pending_edus: + sent_edus_by_type.labels(edu.edu_type).inc() + # Remove the acknowledged device messages from the database + # Only bother if we actually sent some device messages + if device_message_edus: + yield self._store.delete_device_msgs_for_remote( + self._destination, device_stream_id + ) + logger.info( + "Marking as sent %r %r", self._destination, dev_list_id + ) + yield self._store.mark_as_sent_devices_by_remote( + self._destination, dev_list_id + ) + + self._last_device_stream_id = device_stream_id + self._last_device_list_stream_id = dev_list_id + else: + break + except NotRetryingDestination as e: + logger.debug( + "TX [%s] not ready for retry yet (next retry at %s) - " + "dropping transaction for now", + self._destination, + datetime.datetime.fromtimestamp( + (e.retry_last_ts + e.retry_interval) / 1000.0 + ), + ) + except FederationDeniedError as e: + logger.info(e) + except HttpResponseException as e: + logger.warning( + "TX [%s] Received %d response to transaction: %s", + self._destination, e.code, e, + ) + except RequestSendFailed as e: + logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) + + for p, _ in pending_pdus: + logger.info("Failed to send event %s to %s", p.event_id, + self._destination) + except Exception: + logger.exception( + "TX [%s] Failed to send transaction", + self._destination, + ) + for p, _ in pending_pdus: + logger.info("Failed to send event %s to %s", p.event_id, + self._destination) + finally: + # We want to be *very* sure we clear this after we stop processing + self.transmission_loop_running = False + + def _get_rr_edus(self, force_flush): + if not self._pending_rrs: + return + if not force_flush and not self._rrs_pending_flush: + # not yet time for this lot + return + + edu = Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.receipt", + content=self._pending_rrs, + ) + self._pending_rrs = {} + self._rrs_pending_flush = False + yield edu + + def _pop_pending_edus(self, limit): + pending_edus = self._pending_edus + pending_edus, self._pending_edus = pending_edus[:limit], pending_edus[limit:] + return pending_edus + + @defer.inlineCallbacks + def _get_new_device_messages(self): + last_device_stream_id = self._last_device_stream_id + to_device_stream_id = self._store.get_to_device_stream_token() + contents, stream_id = yield self._store.get_new_device_msgs_for_remote( + self._destination, last_device_stream_id, to_device_stream_id + ) + edus = [ + Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.direct_to_device", + content=content, + ) + for content in contents + ] + + last_device_list = self._last_device_list_stream_id + now_stream_id, results = yield self._store.get_devices_by_remote( + self._destination, last_device_list + ) + edus.extend( + Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.device_list_update", + content=content, + ) + for content in results + ) + defer.returnValue((edus, stream_id, now_stream_id)) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py new file mode 100644 index 0000000000..35e6b8ff5b --- /dev/null +++ b/synapse/federation/sender/transaction_manager.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.internet import defer + +from synapse.api.errors import HttpResponseException +from synapse.federation.persistence import TransactionActions +from synapse.federation.units import Transaction +from synapse.util.metrics import measure_func + +logger = logging.getLogger(__name__) + + +class TransactionManager(object): + """Helper class which handles building and sending transactions + + shared between PerDestinationQueue objects + """ + def __init__(self, hs): + self._server_name = hs.hostname + self.clock = hs.get_clock() # nb must be called this for @measure_func + self._store = hs.get_datastore() + self._transaction_actions = TransactionActions(self._store) + self._transport_layer = hs.get_federation_transport_client() + + # HACK to get unique tx id + self._next_txn_id = int(self.clock.time_msec()) + + @measure_func("_send_new_transaction") + @defer.inlineCallbacks + def send_new_transaction(self, destination, pending_pdus, pending_edus): + + # Sort based on the order field + pending_pdus.sort(key=lambda t: t[1]) + pdus = [x[0] for x in pending_pdus] + edus = pending_edus + + success = True + + logger.debug("TX [%s] _attempt_new_transaction", destination) + + txn_id = str(self._next_txn_id) + + logger.debug( + "TX [%s] {%s} Attempting new transaction" + " (pdus: %d, edus: %d)", + destination, txn_id, + len(pdus), + len(edus), + ) + + logger.debug("TX [%s] Persisting transaction...", destination) + + transaction = Transaction.create_new( + origin_server_ts=int(self.clock.time_msec()), + transaction_id=txn_id, + origin=self._server_name, + destination=destination, + pdus=pdus, + edus=edus, + ) + + self._next_txn_id += 1 + + yield self._transaction_actions.prepare_to_send(transaction) + + logger.debug("TX [%s] Persisted transaction", destination) + logger.info( + "TX [%s] {%s} Sending transaction [%s]," + " (PDUs: %d, EDUs: %d)", + destination, txn_id, + transaction.transaction_id, + len(pdus), + len(edus), + ) + + # Actually send the transaction + + # FIXME (erikj): This is a bit of a hack to make the Pdu age + # keys work + def json_data_cb(): + data = transaction.get_dict() + now = int(self.clock.time_msec()) + if "pdus" in data: + for p in data["pdus"]: + if "age_ts" in p: + unsigned = p.setdefault("unsigned", {}) + unsigned["age"] = now - int(p["age_ts"]) + del p["age_ts"] + return data + + try: + response = yield self._transport_layer.send_transaction( + transaction, json_data_cb + ) + code = 200 + except HttpResponseException as e: + code = e.code + response = e.response + + if e.code in (401, 404, 429) or 500 <= e.code: + logger.info( + "TX [%s] {%s} got %d response", + destination, txn_id, code + ) + raise e + + logger.info( + "TX [%s] {%s} got %d response", + destination, txn_id, code + ) + + yield self._transaction_actions.delivered( + transaction, code, response + ) + + logger.debug("TX [%s] {%s} Marked as delivered", destination, txn_id) + + if code == 200: + for e_id, r in response.get("pdus", {}).items(): + if "error" in r: + logger.warn( + "TX [%s] {%s} Remote returned error for %s: %s", + destination, txn_id, e_id, r, + ) + else: + for p in pdus: + logger.warn( + "TX [%s] {%s} Failed to send event %s", + destination, txn_id, p.event_id, + ) + success = False + + defer.returnValue(success) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py deleted file mode 100644 index 30941f5ad6..0000000000 --- a/synapse/federation/transaction_queue.py +++ /dev/null @@ -1,699 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import datetime -import logging - -from six import itervalues - -from prometheus_client import Counter - -from twisted.internet import defer - -import synapse.metrics -from synapse.api.errors import ( - FederationDeniedError, - HttpResponseException, - RequestSendFailed, -) -from synapse.handlers.presence import format_user_presence_state, get_interested_remotes -from synapse.metrics import ( - LaterGauge, - event_processing_loop_counter, - event_processing_loop_room_count, - events_processed_counter, - sent_transactions_counter, -) -from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.util import logcontext -from synapse.util.metrics import measure_func -from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter - -from .persistence import TransactionActions -from .units import Edu, Transaction - -logger = logging.getLogger(__name__) - -sent_pdus_destination_dist_count = Counter( - "synapse_federation_client_sent_pdu_destinations:count", - "Number of PDUs queued for sending to one or more destinations", -) - -sent_pdus_destination_dist_total = Counter( - "synapse_federation_client_sent_pdu_destinations:total", "" - "Total number of PDUs queued for sending across all destinations", -) - -sent_edus_counter = Counter( - "synapse_federation_client_sent_edus", - "Total number of EDUs successfully sent", -) - -sent_edus_by_type = Counter( - "synapse_federation_client_sent_edus_by_type", - "Number of sent EDUs successfully sent, by event type", - ["type"], -) - - -class TransactionQueue(object): - """This class makes sure we only have one transaction in flight at - a time for a given destination. - - It batches pending PDUs into single transactions. - """ - - def __init__(self, hs): - self.hs = hs - self.server_name = hs.hostname - - self.store = hs.get_datastore() - self.state = hs.get_state_handler() - self.transaction_actions = TransactionActions(self.store) - - self.transport_layer = hs.get_federation_transport_client() - - self.clock = hs.get_clock() - self.is_mine_id = hs.is_mine_id - - # Is a mapping from destinations -> deferreds. Used to keep track - # of which destinations have transactions in flight and when they are - # done - self.pending_transactions = {} - - LaterGauge( - "synapse_federation_transaction_queue_pending_destinations", - "", - [], - lambda: len(self.pending_transactions), - ) - - # Is a mapping from destination -> list of - # tuple(pending pdus, deferred, order) - self.pending_pdus_by_dest = pdus = {} - # destination -> list of tuple(edu, deferred) - self.pending_edus_by_dest = edus = {} - - # Map of user_id -> UserPresenceState for all the pending presence - # to be sent out by user_id. Entries here get processed and put in - # pending_presence_by_dest - self.pending_presence = {} - - # Map of destination -> user_id -> UserPresenceState of pending presence - # to be sent to each destinations - self.pending_presence_by_dest = presence = {} - - # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered - # based on their key (e.g. typing events by room_id) - # Map of destination -> (edu_type, key) -> Edu - self.pending_edus_keyed_by_dest = edus_keyed = {} - - LaterGauge( - "synapse_federation_transaction_queue_pending_pdus", - "", - [], - lambda: sum(map(len, pdus.values())), - ) - LaterGauge( - "synapse_federation_transaction_queue_pending_edus", - "", - [], - lambda: ( - sum(map(len, edus.values())) - + sum(map(len, presence.values())) - + sum(map(len, edus_keyed.values())) - ), - ) - - # destination -> stream_id of last successfully sent to-device message. - # NB: may be a long or an int. - self.last_device_stream_id_by_dest = {} - - # destination -> stream_id of last successfully sent device list - # update. - self.last_device_list_stream_id_by_dest = {} - - # HACK to get unique tx id - self._next_txn_id = int(self.clock.time_msec()) - - self._order = 1 - - self._is_processing = False - self._last_poked_id = -1 - - self._processing_pending_presence = False - - def notify_new_events(self, current_id): - """This gets called when we have some new events we might want to - send out to other servers. - """ - self._last_poked_id = max(current_id, self._last_poked_id) - - if self._is_processing: - return - - # fire off a processing loop in the background - run_as_background_process( - "process_event_queue_for_federation", - self._process_event_queue_loop, - ) - - @defer.inlineCallbacks - def _process_event_queue_loop(self): - try: - self._is_processing = True - while True: - last_token = yield self.store.get_federation_out_pos("events") - next_token, events = yield self.store.get_all_new_events_stream( - last_token, self._last_poked_id, limit=100, - ) - - logger.debug("Handling %s -> %s", last_token, next_token) - - if not events and next_token >= self._last_poked_id: - break - - @defer.inlineCallbacks - def handle_event(event): - # Only send events for this server. - send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of() - is_mine = self.is_mine_id(event.sender) - if not is_mine and send_on_behalf_of is None: - return - - try: - # Get the state from before the event. - # We need to make sure that this is the state from before - # the event and not from after it. - # Otherwise if the last member on a server in a room is - # banned then it won't receive the event because it won't - # be in the room after the ban. - destinations = yield self.state.get_current_hosts_in_room( - event.room_id, latest_event_ids=event.prev_event_ids(), - ) - except Exception: - logger.exception( - "Failed to calculate hosts in room for event: %s", - event.event_id, - ) - return - - destinations = set(destinations) - - if send_on_behalf_of is not None: - # If we are sending the event on behalf of another server - # then it already has the event and there is no reason to - # send the event to it. - destinations.discard(send_on_behalf_of) - - logger.debug("Sending %s to %r", event, destinations) - - self._send_pdu(event, destinations) - - @defer.inlineCallbacks - def handle_room_events(events): - for event in events: - yield handle_event(event) - - events_by_room = {} - for event in events: - events_by_room.setdefault(event.room_id, []).append(event) - - yield logcontext.make_deferred_yieldable(defer.gatherResults( - [ - logcontext.run_in_background(handle_room_events, evs) - for evs in itervalues(events_by_room) - ], - consumeErrors=True - )) - - yield self.store.update_federation_out_pos( - "events", next_token - ) - - if events: - now = self.clock.time_msec() - ts = yield self.store.get_received_ts(events[-1].event_id) - - synapse.metrics.event_processing_lag.labels( - "federation_sender").set(now - ts) - synapse.metrics.event_processing_last_ts.labels( - "federation_sender").set(ts) - - events_processed_counter.inc(len(events)) - - event_processing_loop_room_count.labels( - "federation_sender" - ).inc(len(events_by_room)) - - event_processing_loop_counter.labels("federation_sender").inc() - - synapse.metrics.event_processing_positions.labels( - "federation_sender").set(next_token) - - finally: - self._is_processing = False - - def _send_pdu(self, pdu, destinations): - # We loop through all destinations to see whether we already have - # a transaction in progress. If we do, stick it in the pending_pdus - # table and we'll get back to it later. - - order = self._order - self._order += 1 - - destinations = set(destinations) - destinations.discard(self.server_name) - logger.debug("Sending to: %s", str(destinations)) - - if not destinations: - return - - sent_pdus_destination_dist_total.inc(len(destinations)) - sent_pdus_destination_dist_count.inc() - - for destination in destinations: - self.pending_pdus_by_dest.setdefault(destination, []).append( - (pdu, order) - ) - - self._attempt_new_transaction(destination) - - @logcontext.preserve_fn # the caller should not yield on this - @defer.inlineCallbacks - def send_presence(self, states): - """Send the new presence states to the appropriate destinations. - - This actually queues up the presence states ready for sending and - triggers a background task to process them and send out the transactions. - - Args: - states (list(UserPresenceState)) - """ - if not self.hs.config.use_presence: - # No-op if presence is disabled. - return - - # First we queue up the new presence by user ID, so multiple presence - # updates in quick successtion are correctly handled - # We only want to send presence for our own users, so lets always just - # filter here just in case. - self.pending_presence.update({ - state.user_id: state for state in states - if self.is_mine_id(state.user_id) - }) - - # We then handle the new pending presence in batches, first figuring - # out the destinations we need to send each state to and then poking it - # to attempt a new transaction. We linearize this so that we don't - # accidentally mess up the ordering and send multiple presence updates - # in the wrong order - if self._processing_pending_presence: - return - - self._processing_pending_presence = True - try: - while True: - states_map = self.pending_presence - self.pending_presence = {} - - if not states_map: - break - - yield self._process_presence_inner(list(states_map.values())) - except Exception: - logger.exception("Error sending presence states to servers") - finally: - self._processing_pending_presence = False - - @measure_func("txnqueue._process_presence") - @defer.inlineCallbacks - def _process_presence_inner(self, states): - """Given a list of states populate self.pending_presence_by_dest and - poke to send a new transaction to each destination - - Args: - states (list(UserPresenceState)) - """ - hosts_and_states = yield get_interested_remotes(self.store, states, self.state) - - for destinations, states in hosts_and_states: - for destination in destinations: - if destination == self.server_name: - continue - - self.pending_presence_by_dest.setdefault( - destination, {} - ).update({ - state.user_id: state for state in states - }) - - self._attempt_new_transaction(destination) - - def send_edu(self, destination, edu_type, content, key=None): - edu = Edu( - origin=self.server_name, - destination=destination, - edu_type=edu_type, - content=content, - ) - - if destination == self.server_name: - logger.info("Not sending EDU to ourselves") - return - - if key: - self.pending_edus_keyed_by_dest.setdefault( - destination, {} - )[(edu.edu_type, key)] = edu - else: - self.pending_edus_by_dest.setdefault(destination, []).append(edu) - - self._attempt_new_transaction(destination) - - def send_device_messages(self, destination): - if destination == self.server_name: - logger.info("Not sending device update to ourselves") - return - - self._attempt_new_transaction(destination) - - def get_current_token(self): - return 0 - - def _attempt_new_transaction(self, destination): - """Try to start a new transaction to this destination - - If there is already a transaction in progress to this destination, - returns immediately. Otherwise kicks off the process of sending a - transaction in the background. - - Args: - destination (str): - - Returns: - None - """ - # list of (pending_pdu, deferred, order) - if destination in self.pending_transactions: - # XXX: pending_transactions can get stuck on by a never-ending - # request at which point pending_pdus_by_dest just keeps growing. - # we need application-layer timeouts of some flavour of these - # requests - logger.debug( - "TX [%s] Transaction already in progress", - destination - ) - return - - logger.debug("TX [%s] Starting transaction loop", destination) - - run_as_background_process( - "federation_transaction_transmission_loop", - self._transaction_transmission_loop, - destination, - ) - - @defer.inlineCallbacks - def _transaction_transmission_loop(self, destination): - pending_pdus = [] - try: - self.pending_transactions[destination] = 1 - - # This will throw if we wouldn't retry. We do this here so we fail - # quickly, but we will later check this again in the http client, - # hence why we throw the result away. - yield get_retry_limiter(destination, self.clock, self.store) - - pending_pdus = [] - while True: - device_message_edus, device_stream_id, dev_list_id = ( - yield self._get_new_device_messages(destination) - ) - - # BEGIN CRITICAL SECTION - # - # In order to avoid a race condition, we need to make sure that - # the following code (from popping the queues up to the point - # where we decide if we actually have any pending messages) is - # atomic - otherwise new PDUs or EDUs might arrive in the - # meantime, but not get sent because we hold the - # pending_transactions flag. - - pending_pdus = self.pending_pdus_by_dest.pop(destination, []) - - # We can only include at most 50 PDUs per transactions - pending_pdus, leftover_pdus = pending_pdus[:50], pending_pdus[50:] - if leftover_pdus: - self.pending_pdus_by_dest[destination] = leftover_pdus - - pending_edus = self.pending_edus_by_dest.pop(destination, []) - - # We can only include at most 100 EDUs per transactions - pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:] - if leftover_edus: - self.pending_edus_by_dest[destination] = leftover_edus - - pending_presence = self.pending_presence_by_dest.pop(destination, {}) - - pending_edus.extend( - self.pending_edus_keyed_by_dest.pop(destination, {}).values() - ) - - pending_edus.extend(device_message_edus) - if pending_presence: - pending_edus.append( - Edu( - origin=self.server_name, - destination=destination, - edu_type="m.presence", - content={ - "push": [ - format_user_presence_state( - presence, self.clock.time_msec() - ) - for presence in pending_presence.values() - ] - }, - ) - ) - - if pending_pdus: - logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", - destination, len(pending_pdus)) - - if not pending_pdus and not pending_edus: - logger.debug("TX [%s] Nothing to send", destination) - self.last_device_stream_id_by_dest[destination] = ( - device_stream_id - ) - return - - # END CRITICAL SECTION - - success = yield self._send_new_transaction( - destination, pending_pdus, pending_edus, - ) - if success: - sent_transactions_counter.inc() - sent_edus_counter.inc(len(pending_edus)) - for edu in pending_edus: - sent_edus_by_type.labels(edu.edu_type).inc() - # Remove the acknowledged device messages from the database - # Only bother if we actually sent some device messages - if device_message_edus: - yield self.store.delete_device_msgs_for_remote( - destination, device_stream_id - ) - logger.info("Marking as sent %r %r", destination, dev_list_id) - yield self.store.mark_as_sent_devices_by_remote( - destination, dev_list_id - ) - - self.last_device_stream_id_by_dest[destination] = device_stream_id - self.last_device_list_stream_id_by_dest[destination] = dev_list_id - else: - break - except NotRetryingDestination as e: - logger.debug( - "TX [%s] not ready for retry yet (next retry at %s) - " - "dropping transaction for now", - destination, - datetime.datetime.fromtimestamp( - (e.retry_last_ts + e.retry_interval) / 1000.0 - ), - ) - except FederationDeniedError as e: - logger.info(e) - except HttpResponseException as e: - logger.warning( - "TX [%s] Received %d response to transaction: %s", - destination, e.code, e, - ) - except RequestSendFailed as e: - logger.warning("TX [%s] Failed to send transaction: %s", destination, e) - - for p, _ in pending_pdus: - logger.info("Failed to send event %s to %s", p.event_id, - destination) - except Exception: - logger.exception( - "TX [%s] Failed to send transaction", - destination, - ) - for p, _ in pending_pdus: - logger.info("Failed to send event %s to %s", p.event_id, - destination) - finally: - # We want to be *very* sure we delete this after we stop processing - self.pending_transactions.pop(destination, None) - - @defer.inlineCallbacks - def _get_new_device_messages(self, destination): - last_device_stream_id = self.last_device_stream_id_by_dest.get(destination, 0) - to_device_stream_id = self.store.get_to_device_stream_token() - contents, stream_id = yield self.store.get_new_device_msgs_for_remote( - destination, last_device_stream_id, to_device_stream_id - ) - edus = [ - Edu( - origin=self.server_name, - destination=destination, - edu_type="m.direct_to_device", - content=content, - ) - for content in contents - ] - - last_device_list = self.last_device_list_stream_id_by_dest.get(destination, 0) - now_stream_id, results = yield self.store.get_devices_by_remote( - destination, last_device_list - ) - edus.extend( - Edu( - origin=self.server_name, - destination=destination, - edu_type="m.device_list_update", - content=content, - ) - for content in results - ) - defer.returnValue((edus, stream_id, now_stream_id)) - - @measure_func("_send_new_transaction") - @defer.inlineCallbacks - def _send_new_transaction(self, destination, pending_pdus, pending_edus): - - # Sort based on the order field - pending_pdus.sort(key=lambda t: t[1]) - pdus = [x[0] for x in pending_pdus] - edus = pending_edus - - success = True - - logger.debug("TX [%s] _attempt_new_transaction", destination) - - txn_id = str(self._next_txn_id) - - logger.debug( - "TX [%s] {%s} Attempting new transaction" - " (pdus: %d, edus: %d)", - destination, txn_id, - len(pdus), - len(edus), - ) - - logger.debug("TX [%s] Persisting transaction...", destination) - - transaction = Transaction.create_new( - origin_server_ts=int(self.clock.time_msec()), - transaction_id=txn_id, - origin=self.server_name, - destination=destination, - pdus=pdus, - edus=edus, - ) - - self._next_txn_id += 1 - - yield self.transaction_actions.prepare_to_send(transaction) - - logger.debug("TX [%s] Persisted transaction", destination) - logger.info( - "TX [%s] {%s} Sending transaction [%s]," - " (PDUs: %d, EDUs: %d)", - destination, txn_id, - transaction.transaction_id, - len(pdus), - len(edus), - ) - - # Actually send the transaction - - # FIXME (erikj): This is a bit of a hack to make the Pdu age - # keys work - def json_data_cb(): - data = transaction.get_dict() - now = int(self.clock.time_msec()) - if "pdus" in data: - for p in data["pdus"]: - if "age_ts" in p: - unsigned = p.setdefault("unsigned", {}) - unsigned["age"] = now - int(p["age_ts"]) - del p["age_ts"] - return data - - try: - response = yield self.transport_layer.send_transaction( - transaction, json_data_cb - ) - code = 200 - except HttpResponseException as e: - code = e.code - response = e.response - - if e.code in (401, 404, 429) or 500 <= e.code: - logger.info( - "TX [%s] {%s} got %d response", - destination, txn_id, code - ) - raise e - - logger.info( - "TX [%s] {%s} got %d response", - destination, txn_id, code - ) - - yield self.transaction_actions.delivered( - transaction, code, response - ) - - logger.debug("TX [%s] {%s} Marked as delivered", destination, txn_id) - - if code == 200: - for e_id, r in response.get("pdus", {}).items(): - if "error" in r: - logger.warn( - "TX [%s] {%s} Remote returned error for %s: %s", - destination, txn_id, e_id, r, - ) - else: - for p in pdus: - logger.warn( - "TX [%s] {%s} Failed to send event %s", - destination, txn_id, p.event_id, - ) - success = False - - defer.returnValue(success) diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 8e2be218e2..e424c40fdf 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -51,9 +51,10 @@ class TransportLayerClient(object): logger.debug("get_room_state dest=%s, room=%s", destination, room_id) - path = _create_v1_path("/state/%s/", room_id) + path = _create_v1_path("/state/%s", room_id) return self.client.get_json( destination, path=path, args={"event_id": event_id}, + try_trailing_slash_on_400=True, ) @log_function @@ -73,9 +74,10 @@ class TransportLayerClient(object): logger.debug("get_room_state_ids dest=%s, room=%s", destination, room_id) - path = _create_v1_path("/state_ids/%s/", room_id) + path = _create_v1_path("/state_ids/%s", room_id) return self.client.get_json( destination, path=path, args={"event_id": event_id}, + try_trailing_slash_on_400=True, ) @log_function @@ -95,8 +97,11 @@ class TransportLayerClient(object): logger.debug("get_pdu dest=%s, event_id=%s", destination, event_id) - path = _create_v1_path("/event/%s/", event_id) - return self.client.get_json(destination, path=path, timeout=timeout) + path = _create_v1_path("/event/%s", event_id) + return self.client.get_json( + destination, path=path, timeout=timeout, + try_trailing_slash_on_400=True, + ) @log_function def backfill(self, destination, room_id, event_tuples, limit): @@ -121,7 +126,7 @@ class TransportLayerClient(object): # TODO: raise? return - path = _create_v1_path("/backfill/%s/", room_id) + path = _create_v1_path("/backfill/%s", room_id) args = { "v": event_tuples, @@ -132,6 +137,7 @@ class TransportLayerClient(object): destination, path=path, args=args, + try_trailing_slash_on_400=True, ) @defer.inlineCallbacks @@ -167,7 +173,7 @@ class TransportLayerClient(object): # generated by the json_data_callback. json_data = transaction.get_dict() - path = _create_v1_path("/send/%s/", transaction.transaction_id) + path = _create_v1_path("/send/%s", transaction.transaction_id) response = yield self.client.put_json( transaction.destination, @@ -176,6 +182,7 @@ class TransportLayerClient(object): json_data_callback=json_data_callback, long_retries=True, backoff_on_404=True, # If we get a 404 the other side has gone + try_trailing_slash_on_400=True, ) defer.returnValue(response) @@ -959,7 +966,7 @@ def _create_v1_path(path, *args): Example: - _create_v1_path("/event/%s/", event_id) + _create_v1_path("/event/%s", event_id) Args: path (str): String template for the path @@ -980,7 +987,7 @@ def _create_v2_path(path, *args): Example: - _create_v2_path("/event/%s/", event_id) + _create_v2_path("/event/%s", event_id) Args: path (str): String template for the path diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 5ba94be2ec..efb6bdca48 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -312,7 +312,7 @@ class BaseFederationServlet(object): class FederationSendServlet(BaseFederationServlet): - PATH = "/send/(?P<transaction_id>[^/]*)/" + PATH = "/send/(?P<transaction_id>[^/]*)/?" def __init__(self, handler, server_name, **kwargs): super(FederationSendServlet, self).__init__( @@ -378,7 +378,7 @@ class FederationSendServlet(BaseFederationServlet): class FederationEventServlet(BaseFederationServlet): - PATH = "/event/(?P<event_id>[^/]*)/" + PATH = "/event/(?P<event_id>[^/]*)/?" # This is when someone asks for a data item for a given server data_id pair. def on_GET(self, origin, content, query, event_id): @@ -386,30 +386,30 @@ class FederationEventServlet(BaseFederationServlet): class FederationStateServlet(BaseFederationServlet): - PATH = "/state/(?P<context>[^/]*)/" + PATH = "/state/(?P<context>[^/]*)/?" # This is when someone asks for all data for a given context. def on_GET(self, origin, content, query, context): return self.handler.on_context_state_request( origin, context, - parse_string_from_args(query, "event_id", None), + parse_string_from_args(query, "event_id", None, required=True), ) class FederationStateIdsServlet(BaseFederationServlet): - PATH = "/state_ids/(?P<room_id>[^/]*)/" + PATH = "/state_ids/(?P<room_id>[^/]*)/?" def on_GET(self, origin, content, query, room_id): return self.handler.on_state_ids_request( origin, room_id, - parse_string_from_args(query, "event_id", None), + parse_string_from_args(query, "event_id", None, required=True), ) class FederationBackfillServlet(BaseFederationServlet): - PATH = "/backfill/(?P<context>[^/]*)/" + PATH = "/backfill/(?P<context>[^/]*)/?" def on_GET(self, origin, content, query, context): versions = [x.decode('ascii') for x in query[b"v"]] @@ -759,7 +759,7 @@ class FederationVersionServlet(BaseFederationServlet): class FederationGroupsProfileServlet(BaseFederationServlet): """Get/set the basic profile of a group on behalf of a user """ - PATH = "/groups/(?P<group_id>[^/]*)/profile$" + PATH = "/groups/(?P<group_id>[^/]*)/profile" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -787,7 +787,7 @@ class FederationGroupsProfileServlet(BaseFederationServlet): class FederationGroupsSummaryServlet(BaseFederationServlet): - PATH = "/groups/(?P<group_id>[^/]*)/summary$" + PATH = "/groups/(?P<group_id>[^/]*)/summary" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -805,7 +805,7 @@ class FederationGroupsSummaryServlet(BaseFederationServlet): class FederationGroupsRoomsServlet(BaseFederationServlet): """Get the rooms in a group on behalf of a user """ - PATH = "/groups/(?P<group_id>[^/]*)/rooms$" + PATH = "/groups/(?P<group_id>[^/]*)/rooms" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -823,7 +823,7 @@ class FederationGroupsRoomsServlet(BaseFederationServlet): class FederationGroupsAddRoomsServlet(BaseFederationServlet): """Add/remove room from group """ - PATH = "/groups/(?P<group_id>[^/]*)/room/(?P<room_id>[^/]*)$" + PATH = "/groups/(?P<group_id>[^/]*)/room/(?P<room_id>[^/]*)" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, room_id): @@ -855,7 +855,7 @@ class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet): """ PATH = ( "/groups/(?P<group_id>[^/]*)/room/(?P<room_id>[^/]*)" - "/config/(?P<config_key>[^/]*)$" + "/config/(?P<config_key>[^/]*)" ) @defer.inlineCallbacks @@ -874,7 +874,7 @@ class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet): class FederationGroupsUsersServlet(BaseFederationServlet): """Get the users in a group on behalf of a user """ - PATH = "/groups/(?P<group_id>[^/]*)/users$" + PATH = "/groups/(?P<group_id>[^/]*)/users" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -892,7 +892,7 @@ class FederationGroupsUsersServlet(BaseFederationServlet): class FederationGroupsInvitedUsersServlet(BaseFederationServlet): """Get the users that have been invited to a group """ - PATH = "/groups/(?P<group_id>[^/]*)/invited_users$" + PATH = "/groups/(?P<group_id>[^/]*)/invited_users" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -910,7 +910,7 @@ class FederationGroupsInvitedUsersServlet(BaseFederationServlet): class FederationGroupsInviteServlet(BaseFederationServlet): """Ask a group server to invite someone to the group """ - PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/invite$" + PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/invite" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -928,7 +928,7 @@ class FederationGroupsInviteServlet(BaseFederationServlet): class FederationGroupsAcceptInviteServlet(BaseFederationServlet): """Accept an invitation from the group server """ - PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/accept_invite$" + PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/accept_invite" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -945,7 +945,7 @@ class FederationGroupsAcceptInviteServlet(BaseFederationServlet): class FederationGroupsJoinServlet(BaseFederationServlet): """Attempt to join a group """ - PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/join$" + PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/join" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -962,7 +962,7 @@ class FederationGroupsJoinServlet(BaseFederationServlet): class FederationGroupsRemoveUserServlet(BaseFederationServlet): """Leave or kick a user from the group """ - PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/remove$" + PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/remove" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -980,7 +980,7 @@ class FederationGroupsRemoveUserServlet(BaseFederationServlet): class FederationGroupsLocalInviteServlet(BaseFederationServlet): """A group server has invited a local user """ - PATH = "/groups/local/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/invite$" + PATH = "/groups/local/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/invite" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -997,7 +997,7 @@ class FederationGroupsLocalInviteServlet(BaseFederationServlet): class FederationGroupsRemoveLocalUserServlet(BaseFederationServlet): """A group server has removed a local user """ - PATH = "/groups/local/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/remove$" + PATH = "/groups/local/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/remove" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -1014,7 +1014,7 @@ class FederationGroupsRemoveLocalUserServlet(BaseFederationServlet): class FederationGroupsRenewAttestaionServlet(BaseFederationServlet): """A group or user's server renews their attestation """ - PATH = "/groups/(?P<group_id>[^/]*)/renew_attestation/(?P<user_id>[^/]*)$" + PATH = "/groups/(?P<group_id>[^/]*)/renew_attestation/(?P<user_id>[^/]*)" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -1037,7 +1037,7 @@ class FederationGroupsSummaryRoomsServlet(BaseFederationServlet): PATH = ( "/groups/(?P<group_id>[^/]*)/summary" "(/categories/(?P<category_id>[^/]+))?" - "/rooms/(?P<room_id>[^/]*)$" + "/rooms/(?P<room_id>[^/]*)" ) @defer.inlineCallbacks @@ -1080,7 +1080,7 @@ class FederationGroupsCategoriesServlet(BaseFederationServlet): """Get all categories for a group """ PATH = ( - "/groups/(?P<group_id>[^/]*)/categories/$" + "/groups/(?P<group_id>[^/]*)/categories/?" ) @defer.inlineCallbacks @@ -1100,7 +1100,7 @@ class FederationGroupsCategoryServlet(BaseFederationServlet): """Add/remove/get a category in a group """ PATH = ( - "/groups/(?P<group_id>[^/]*)/categories/(?P<category_id>[^/]+)$" + "/groups/(?P<group_id>[^/]*)/categories/(?P<category_id>[^/]+)" ) @defer.inlineCallbacks @@ -1150,7 +1150,7 @@ class FederationGroupsRolesServlet(BaseFederationServlet): """Get roles in a group """ PATH = ( - "/groups/(?P<group_id>[^/]*)/roles/$" + "/groups/(?P<group_id>[^/]*)/roles/?" ) @defer.inlineCallbacks @@ -1170,7 +1170,7 @@ class FederationGroupsRoleServlet(BaseFederationServlet): """Add/remove/get a role in a group """ PATH = ( - "/groups/(?P<group_id>[^/]*)/roles/(?P<role_id>[^/]+)$" + "/groups/(?P<group_id>[^/]*)/roles/(?P<role_id>[^/]+)" ) @defer.inlineCallbacks @@ -1226,7 +1226,7 @@ class FederationGroupsSummaryUsersServlet(BaseFederationServlet): PATH = ( "/groups/(?P<group_id>[^/]*)/summary" "(/roles/(?P<role_id>[^/]+))?" - "/users/(?P<user_id>[^/]*)$" + "/users/(?P<user_id>[^/]*)" ) @defer.inlineCallbacks @@ -1269,7 +1269,7 @@ class FederationGroupsBulkPublicisedServlet(BaseFederationServlet): """Get roles in a group """ PATH = ( - "/get_groups_publicised$" + "/get_groups_publicised" ) @defer.inlineCallbacks @@ -1284,7 +1284,7 @@ class FederationGroupsBulkPublicisedServlet(BaseFederationServlet): class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): """Sets whether a group is joinable without an invite or knock """ - PATH = "/groups/(?P<group_id>[^/]*)/settings/m.join_policy$" + PATH = "/groups/(?P<group_id>[^/]*)/settings/m.join_policy" @defer.inlineCallbacks def on_PUT(self, origin, content, query, group_id): diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 594754cfd8..ac09d03ba9 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -93,9 +93,9 @@ class BaseHandler(object): messages_per_second = self.hs.config.rc_messages_per_second burst_count = self.hs.config.rc_message_burst_count - allowed, time_allowed = self.ratelimiter.send_message( + allowed, time_allowed = self.ratelimiter.can_do_action( user_id, time_now, - msg_rate_hz=messages_per_second, + rate_hz=messages_per_second, burst_count=burst_count, update=update, ) @@ -165,6 +165,7 @@ class BaseHandler(object): member_event.room_id, "leave", ratelimit=False, + require_consent=False, ) except Exception as e: logger.exception("Error kicking guest user: %s" % (e,)) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 2abd9af94f..4544de821d 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( StoreError, SynapseError, ) +from synapse.api.ratelimiting import Ratelimiter from synapse.module_api import ModuleApi from synapse.types import UserID from synapse.util import logcontext @@ -99,6 +100,11 @@ class AuthHandler(BaseHandler): login_types.append(t) self._supported_login_types = login_types + self._account_ratelimiter = Ratelimiter() + self._failed_attempts_ratelimiter = Ratelimiter() + + self._clock = self.hs.get_clock() + @defer.inlineCallbacks def validate_user_via_ui_auth(self, requester, request_body, clientip): """ @@ -568,7 +574,12 @@ class AuthHandler(BaseHandler): Returns: defer.Deferred: (unicode) canonical_user_id, or None if zero or multiple matches + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ + self.ratelimit_login_per_account(user_id) res = yield self._find_user_id_and_pwd_hash(user_id) if res is not None: defer.returnValue(res[0]) @@ -634,6 +645,8 @@ class AuthHandler(BaseHandler): StoreError if there was a problem accessing the database SynapseError if there was a problem with the request LoginError if there was an authentication problem. + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ if username.startswith('@'): @@ -643,6 +656,8 @@ class AuthHandler(BaseHandler): username, self.hs.hostname ).to_string() + self.ratelimit_login_per_account(qualified_user_id) + login_type = login_submission.get("type") known_login_type = False @@ -715,15 +730,58 @@ class AuthHandler(BaseHandler): if not known_login_type: raise SynapseError(400, "Unknown login type %s" % login_type) - # unknown username or invalid password. We raise a 403 here, but note - # that if we're doing user-interactive login, it turns all LoginErrors - # into a 401 anyway. + # unknown username or invalid password. + self._failed_attempts_ratelimiter.ratelimit( + qualified_user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=True, + ) + + # We raise a 403 here, but note that if we're doing user-interactive + # login, it turns all LoginErrors into a 401 anyway. raise LoginError( 403, "Invalid password", errcode=Codes.FORBIDDEN ) @defer.inlineCallbacks + def check_password_provider_3pid(self, medium, address, password): + """Check if a password provider is able to validate a thirdparty login + + Args: + medium (str): The medium of the 3pid (ex. email). + address (str): The address of the 3pid (ex. jdoe@example.com). + password (str): The password of the user. + + Returns: + Deferred[(str|None, func|None)]: A tuple of `(user_id, + callback)`. If authentication is successful, `user_id` is a `str` + containing the authenticated, canonical user ID. `callback` is + then either a function to be later run after the server has + completed login/registration, or `None`. If authentication was + unsuccessful, `user_id` and `callback` are both `None`. + """ + for provider in self.password_providers: + if hasattr(provider, "check_3pid_auth"): + # This function is able to return a deferred that either + # resolves None, meaning authentication failure, or upon + # success, to a str (which is the user_id) or a tuple of + # (user_id, callback_func), where callback_func should be run + # after we've finished everything else + result = yield provider.check_3pid_auth( + medium, address, password, + ) + if result: + # Check if the return value is a str or a tuple + if isinstance(result, str): + # If it's a str, set callback function to None + result = (result, None) + defer.returnValue(result) + + defer.returnValue((None, None)) + + @defer.inlineCallbacks def _check_local_password(self, user_id, password): """Authenticate a user against the local password database. @@ -734,7 +792,12 @@ class AuthHandler(BaseHandler): user_id (unicode): complete @user:id password (unicode): the provided password Returns: - (unicode) the canonical_user_id, or None if unknown user / bad password + Deferred[unicode] the canonical_user_id, or Deferred[None] if + unknown user/bad password + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ lookupres = yield self._find_user_id_and_pwd_hash(user_id) if not lookupres: @@ -763,6 +826,7 @@ class AuthHandler(BaseHandler): auth_api.validate_macaroon(macaroon, "login", True, user_id) except Exception: raise AuthError(403, "Invalid token", errcode=Codes.FORBIDDEN) + self.ratelimit_login_per_account(user_id) yield self.auth.check_auth_blocking(user_id) defer.returnValue(user_id) @@ -934,6 +998,33 @@ class AuthHandler(BaseHandler): else: return defer.succeed(False) + def ratelimit_login_per_account(self, user_id): + """Checks whether the process must be stopped because of ratelimiting. + + Checks against two ratelimiters: the generic one for login attempts per + account and the one specific to failed attempts. + + Args: + user_id (unicode): complete @user:id + + Raises: + LimitExceededError if one of the ratelimiters' login requests count + for this user is too high too proceed. + """ + self._failed_attempts_ratelimiter.ratelimit( + user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=False, + ) + + self._account_ratelimiter.ratelimit( + user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_account.per_second, + burst_count=self.hs.config.rc_login_account.burst_count, + update=True, + ) + @attr.s class MacaroonGenerator(object): diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 75fe50c42c..97d3f31d98 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -164,6 +164,7 @@ class DeactivateAccountHandler(BaseHandler): room_id, "leave", ratelimit=False, + require_consent=False, ) except Exception: logger.exception( diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c708c35d4d..b398848079 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -37,13 +37,185 @@ from ._base import BaseHandler logger = logging.getLogger(__name__) -class DeviceHandler(BaseHandler): +class DeviceWorkerHandler(BaseHandler): def __init__(self, hs): - super(DeviceHandler, self).__init__(hs) + super(DeviceWorkerHandler, self).__init__(hs) self.hs = hs self.state = hs.get_state_handler() self._auth_handler = hs.get_auth_handler() + + @defer.inlineCallbacks + def get_devices_by_user(self, user_id): + """ + Retrieve the given user's devices + + Args: + user_id (str): + Returns: + defer.Deferred: list[dict[str, X]]: info on each device + """ + + device_map = yield self.store.get_devices_by_user(user_id) + + ips = yield self.store.get_last_client_ip_by_device( + user_id, device_id=None + ) + + devices = list(device_map.values()) + for device in devices: + _update_device_from_client_ips(device, ips) + + defer.returnValue(devices) + + @defer.inlineCallbacks + def get_device(self, user_id, device_id): + """ Retrieve the given device + + Args: + user_id (str): + device_id (str): + + Returns: + defer.Deferred: dict[str, X]: info on the device + Raises: + errors.NotFoundError: if the device was not found + """ + try: + device = yield self.store.get_device(user_id, device_id) + except errors.StoreError: + raise errors.NotFoundError + ips = yield self.store.get_last_client_ip_by_device( + user_id, device_id, + ) + _update_device_from_client_ips(device, ips) + defer.returnValue(device) + + @measure_func("device.get_user_ids_changed") + @defer.inlineCallbacks + def get_user_ids_changed(self, user_id, from_token): + """Get list of users that have had the devices updated, or have newly + joined a room, that `user_id` may be interested in. + + Args: + user_id (str) + from_token (StreamToken) + """ + now_room_key = yield self.store.get_room_events_max_id() + + room_ids = yield self.store.get_rooms_for_user(user_id) + + # First we check if any devices have changed + changed = yield self.store.get_user_whose_devices_changed( + from_token.device_list_key + ) + + # Then work out if any users have since joined + rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) + + member_events = yield self.store.get_membership_changes_for_user( + user_id, from_token.room_key, now_room_key, + ) + rooms_changed.update(event.room_id for event in member_events) + + stream_ordering = RoomStreamToken.parse_stream_token( + from_token.room_key + ).stream + + possibly_changed = set(changed) + possibly_left = set() + for room_id in rooms_changed: + current_state_ids = yield self.store.get_current_state_ids(room_id) + + # The user may have left the room + # TODO: Check if they actually did or if we were just invited. + if room_id not in room_ids: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_left.add(state_key) + continue + + # Fetch the current state at the time. + try: + event_ids = yield self.store.get_forward_extremeties_for_room( + room_id, stream_ordering=stream_ordering + ) + except errors.StoreError: + # we have purged the stream_ordering index since the stream + # ordering: treat it the same as a new room + event_ids = [] + + # special-case for an empty prev state: include all members + # in the changed list + if not event_ids: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_changed.add(state_key) + continue + + current_member_id = current_state_ids.get((EventTypes.Member, user_id)) + if not current_member_id: + continue + + # mapping from event_id -> state_dict + prev_state_ids = yield self.store.get_state_ids_for_events(event_ids) + + # Check if we've joined the room? If so we just blindly add all the users to + # the "possibly changed" users. + for state_dict in itervalues(prev_state_ids): + member_event = state_dict.get((EventTypes.Member, user_id), None) + if not member_event or member_event != current_member_id: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_changed.add(state_key) + break + + # If there has been any change in membership, include them in the + # possibly changed list. We'll check if they are joined below, + # and we're not toooo worried about spuriously adding users. + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + + # check if this member has changed since any of the extremities + # at the stream_ordering, and add them to the list if so. + for state_dict in itervalues(prev_state_ids): + prev_event_id = state_dict.get(key, None) + if not prev_event_id or prev_event_id != event_id: + if state_key != user_id: + possibly_changed.add(state_key) + break + + if possibly_changed or possibly_left: + users_who_share_room = yield self.store.get_users_who_share_room_with_user( + user_id + ) + + # Take the intersection of the users whose devices may have changed + # and those that actually still share a room with the user + possibly_joined = possibly_changed & users_who_share_room + possibly_left = (possibly_changed | possibly_left) - users_who_share_room + else: + possibly_joined = [] + possibly_left = [] + + defer.returnValue({ + "changed": list(possibly_joined), + "left": list(possibly_left), + }) + + +class DeviceHandler(DeviceWorkerHandler): + def __init__(self, hs): + super(DeviceHandler, self).__init__(hs) + self.federation_sender = hs.get_federation_sender() self._edu_updater = DeviceListEduUpdater(hs, self) @@ -104,52 +276,6 @@ class DeviceHandler(BaseHandler): raise errors.StoreError(500, "Couldn't generate a device ID.") @defer.inlineCallbacks - def get_devices_by_user(self, user_id): - """ - Retrieve the given user's devices - - Args: - user_id (str): - Returns: - defer.Deferred: list[dict[str, X]]: info on each device - """ - - device_map = yield self.store.get_devices_by_user(user_id) - - ips = yield self.store.get_last_client_ip_by_device( - user_id, device_id=None - ) - - devices = list(device_map.values()) - for device in devices: - _update_device_from_client_ips(device, ips) - - defer.returnValue(devices) - - @defer.inlineCallbacks - def get_device(self, user_id, device_id): - """ Retrieve the given device - - Args: - user_id (str): - device_id (str): - - Returns: - defer.Deferred: dict[str, X]: info on the device - Raises: - errors.NotFoundError: if the device was not found - """ - try: - device = yield self.store.get_device(user_id, device_id) - except errors.StoreError: - raise errors.NotFoundError - ips = yield self.store.get_last_client_ip_by_device( - user_id, device_id, - ) - _update_device_from_client_ips(device, ips) - defer.returnValue(device) - - @defer.inlineCallbacks def delete_device(self, user_id, device_id): """ Delete the given device @@ -276,6 +402,12 @@ class DeviceHandler(BaseHandler): user_id, device_ids, list(hosts) ) + for device_id in device_ids: + logger.debug( + "Notifying about update %r/%r, ID: %r", user_id, device_id, + position, + ) + room_ids = yield self.store.get_rooms_for_user(user_id) yield self.notifier.on_new_event( @@ -283,130 +415,10 @@ class DeviceHandler(BaseHandler): ) if hosts: - logger.info("Sending device list update notif to: %r", hosts) + logger.info("Sending device list update notif for %r to: %r", user_id, hosts) for host in hosts: self.federation_sender.send_device_messages(host) - @measure_func("device.get_user_ids_changed") - @defer.inlineCallbacks - def get_user_ids_changed(self, user_id, from_token): - """Get list of users that have had the devices updated, or have newly - joined a room, that `user_id` may be interested in. - - Args: - user_id (str) - from_token (StreamToken) - """ - now_token = yield self.hs.get_event_sources().get_current_token() - - room_ids = yield self.store.get_rooms_for_user(user_id) - - # First we check if any devices have changed - changed = yield self.store.get_user_whose_devices_changed( - from_token.device_list_key - ) - - # Then work out if any users have since joined - rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) - - member_events = yield self.store.get_membership_changes_for_user( - user_id, from_token.room_key, now_token.room_key - ) - rooms_changed.update(event.room_id for event in member_events) - - stream_ordering = RoomStreamToken.parse_stream_token( - from_token.room_key - ).stream - - possibly_changed = set(changed) - possibly_left = set() - for room_id in rooms_changed: - current_state_ids = yield self.store.get_current_state_ids(room_id) - - # The user may have left the room - # TODO: Check if they actually did or if we were just invited. - if room_id not in room_ids: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_left.add(state_key) - continue - - # Fetch the current state at the time. - try: - event_ids = yield self.store.get_forward_extremeties_for_room( - room_id, stream_ordering=stream_ordering - ) - except errors.StoreError: - # we have purged the stream_ordering index since the stream - # ordering: treat it the same as a new room - event_ids = [] - - # special-case for an empty prev state: include all members - # in the changed list - if not event_ids: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_changed.add(state_key) - continue - - current_member_id = current_state_ids.get((EventTypes.Member, user_id)) - if not current_member_id: - continue - - # mapping from event_id -> state_dict - prev_state_ids = yield self.store.get_state_ids_for_events(event_ids) - - # Check if we've joined the room? If so we just blindly add all the users to - # the "possibly changed" users. - for state_dict in itervalues(prev_state_ids): - member_event = state_dict.get((EventTypes.Member, user_id), None) - if not member_event or member_event != current_member_id: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_changed.add(state_key) - break - - # If there has been any change in membership, include them in the - # possibly changed list. We'll check if they are joined below, - # and we're not toooo worried about spuriously adding users. - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - - # check if this member has changed since any of the extremities - # at the stream_ordering, and add them to the list if so. - for state_dict in itervalues(prev_state_ids): - prev_event_id = state_dict.get(key, None) - if not prev_event_id or prev_event_id != event_id: - if state_key != user_id: - possibly_changed.add(state_key) - break - - if possibly_changed or possibly_left: - users_who_share_room = yield self.store.get_users_who_share_room_with_user( - user_id - ) - - # Take the intersection of the users whose devices may have changed - # and those that actually still share a room with the user - possibly_joined = possibly_changed & users_who_share_room - possibly_left = (possibly_changed | possibly_left) - users_who_share_room - else: - possibly_joined = [] - possibly_left = [] - - defer.returnValue({ - "changed": list(possibly_joined), - "left": list(possibly_left), - }) - @defer.inlineCallbacks def on_federation_query_user_devices(self, user_id): stream_id, devices = yield self.store.get_devices_with_keys_by_user(user_id) @@ -473,15 +485,26 @@ class DeviceListEduUpdater(object): if get_domain_from_id(user_id) != origin: # TODO: Raise? - logger.warning("Got device list update edu for %r from %r", user_id, origin) + logger.warning( + "Got device list update edu for %r/%r from %r", + user_id, device_id, origin, + ) return room_ids = yield self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. + logger.warning( + "Got device list update edu for %r/%r, but don't share a room", + user_id, device_id, + ) return + logger.debug( + "Received device list update for %r/%r", user_id, device_id, + ) + self._pending_updates.setdefault(user_id, []).append( (device_id, stream_id, prev_ids, edu_content) ) @@ -499,10 +522,18 @@ class DeviceListEduUpdater(object): # This can happen since we batch updates return + for device_id, stream_id, prev_ids, content in pending_updates: + logger.debug( + "Handling update %r/%r, ID: %r, prev: %r ", + user_id, device_id, stream_id, prev_ids, + ) + # Given a list of updates we check if we need to resync. This # happens if we've missed updates. resync = yield self._need_to_do_resync(user_id, pending_updates) + logger.debug("Need to re-sync devices for %r? %r", user_id, resync) + if resync: # Fetch all devices for the user. origin = get_domain_from_id(user_id) @@ -555,11 +586,21 @@ class DeviceListEduUpdater(object): ) devices = [] + for device in devices: + logger.debug( + "Handling resync update %r/%r, ID: %r", + user_id, device["device_id"], stream_id, + ) + yield self.store.update_remote_device_list_cache( user_id, devices, stream_id, ) device_ids = [device["device_id"] for device in devices] yield self.device_handler.notify_device_update(user_id, device_ids) + + # We clobber the seen updates since we've re-synced from a given + # point. + self._seen_updates[user_id] = set([stream_id]) else: # Simply update the single device, since we know that is the only # change (because of the single prev_id matching the current cache) @@ -572,9 +613,9 @@ class DeviceListEduUpdater(object): user_id, [device_id for device_id, _, _, _ in pending_updates] ) - self._seen_updates.setdefault(user_id, set()).update( - stream_id for _, stream_id, _, _ in pending_updates - ) + self._seen_updates.setdefault(user_id, set()).update( + stream_id for _, stream_id, _, _ in pending_updates + ) @defer.inlineCallbacks def _need_to_do_resync(self, user_id, updates): @@ -587,6 +628,11 @@ class DeviceListEduUpdater(object): user_id ) + logger.debug( + "Current extremity for %r: %r", + user_id, extremity, + ) + stream_id_in_updates = set() # stream_ids in updates list for _, stream_id, prev_ids, _ in updates: if not prev_ids: diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8b113307d2..fe128d9c88 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -44,6 +44,7 @@ class DirectoryHandler(BaseHandler): self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() self.config = hs.config + self.enable_room_list_search = hs.config.enable_room_list_search self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( @@ -411,6 +412,13 @@ class DirectoryHandler(BaseHandler): if visibility not in ["public", "private"]: raise SynapseError(400, "Invalid visibility setting") + if visibility == "public" and not self.enable_room_list_search: + # The room list has been disabled. + raise AuthError( + 403, + "This user is not permitted to publish rooms to the room list" + ) + room = yield self.store.get_room(room_id) if room is None: raise SynapseError(400, "Unknown room") diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index f772e62c28..d883e98381 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -19,7 +19,7 @@ import random from twisted.internet import defer from synapse.api.constants import EventTypes, Membership -from synapse.api.errors import AuthError +from synapse.api.errors import AuthError, SynapseError from synapse.events import EventBase from synapse.events.utils import serialize_event from synapse.types import UserID @@ -61,6 +61,11 @@ class EventStreamHandler(BaseHandler): If `only_keys` is not None, events from keys will be sent down. """ + if room_id: + blocked = yield self.store.is_room_blocked(room_id) + if blocked: + raise SynapseError(403, "This room has been blocked on this server") + # send any outstanding server notices to the user. yield self._server_notices_sender.on_user_syncing(auth_user_id) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f80486102a..9eaf2d3e18 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -45,6 +45,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.crypto.event_signing import compute_event_signature +from synapse.event_auth import auth_types_for_event from synapse.events.validator import EventValidator from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, @@ -858,6 +859,52 @@ class FederationHandler(BaseHandler): logger.debug("Not backfilling as no extremeties found.") return + # We only want to paginate if we can actually see the events we'll get, + # as otherwise we'll just spend a lot of resources to get redacted + # events. + # + # We do this by filtering all the backwards extremities and seeing if + # any remain. Given we don't have the extremity events themselves, we + # need to actually check the events that reference them. + # + # *Note*: the spec wants us to keep backfilling until we reach the start + # of the room in case we are allowed to see some of the history. However + # in practice that causes more issues than its worth, as a) its + # relatively rare for there to be any visible history and b) even when + # there is its often sufficiently long ago that clients would stop + # attempting to paginate before backfill reached the visible history. + # + # TODO: If we do do a backfill then we should filter the backwards + # extremities to only include those that point to visible portions of + # history. + # + # TODO: Correctly handle the case where we are allowed to see the + # forward event but not the backward extremity, e.g. in the case of + # initial join of the server where we are allowed to see the join + # event but not anything before it. This would require looking at the + # state *before* the event, ignoring the special casing certain event + # types have. + + forward_events = yield self.store.get_successor_events( + list(extremities), + ) + + extremities_events = yield self.store.get_events( + forward_events, + check_redacted=False, + get_prev_content=False, + ) + + # We set `check_history_visibility_only` as we might otherwise get false + # positives from users having been erased. + filtered_extremities = yield filter_events_for_server( + self.store, self.server_name, list(extremities_events.values()), + redact=False, check_history_visibility_only=True, + ) + + if not filtered_extremities: + defer.returnValue(False) + # Check if we reached a point where we should start backfilling. sorted_extremeties_tuple = sorted( extremities.items(), @@ -1582,6 +1629,7 @@ class FederationHandler(BaseHandler): origin, event, state=state, auth_events=auth_events, + backfilled=backfilled, ) # reraise does not allow inlineCallbacks to preserve the stacktrace, so we @@ -1626,6 +1674,7 @@ class FederationHandler(BaseHandler): event, state=ev_info.get("state"), auth_events=ev_info.get("auth_events"), + backfilled=backfilled, ) defer.returnValue(res) @@ -1748,7 +1797,7 @@ class FederationHandler(BaseHandler): ) @defer.inlineCallbacks - def _prep_event(self, origin, event, state=None, auth_events=None): + def _prep_event(self, origin, event, state, auth_events, backfilled): """ Args: @@ -1756,6 +1805,7 @@ class FederationHandler(BaseHandler): event: state: auth_events: + backfilled (bool) Returns: Deferred, which resolves to synapse.events.snapshot.EventContext @@ -1797,12 +1847,100 @@ class FederationHandler(BaseHandler): context.rejected = RejectedReason.AUTH_ERROR + if not context.rejected: + yield self._check_for_soft_fail(event, state, backfilled) + if event.type == EventTypes.GuestAccess and not context.rejected: yield self.maybe_kick_guest_users(event) defer.returnValue(context) @defer.inlineCallbacks + def _check_for_soft_fail(self, event, state, backfilled): + """Checks if we should soft fail the event, if so marks the event as + such. + + Args: + event (FrozenEvent) + state (dict|None): The state at the event if we don't have all the + event's prev events + backfilled (bool): Whether the event is from backfill + + Returns: + Deferred + """ + # For new (non-backfilled and non-outlier) events we check if the event + # passes auth based on the current state. If it doesn't then we + # "soft-fail" the event. + do_soft_fail_check = not backfilled and not event.internal_metadata.is_outlier() + if do_soft_fail_check: + extrem_ids = yield self.store.get_latest_event_ids_in_room( + event.room_id, + ) + + extrem_ids = set(extrem_ids) + prev_event_ids = set(event.prev_event_ids()) + + if extrem_ids == prev_event_ids: + # If they're the same then the current state is the same as the + # state at the event, so no point rechecking auth for soft fail. + do_soft_fail_check = False + + if do_soft_fail_check: + room_version = yield self.store.get_room_version(event.room_id) + + # Calculate the "current state". + if state is not None: + # If we're explicitly given the state then we won't have all the + # prev events, and so we have a gap in the graph. In this case + # we want to be a little careful as we might have been down for + # a while and have an incorrect view of the current state, + # however we still want to do checks as gaps are easy to + # maliciously manufacture. + # + # So we use a "current state" that is actually a state + # resolution across the current forward extremities and the + # given state at the event. This should correctly handle cases + # like bans, especially with state res v2. + + state_sets = yield self.store.get_state_groups( + event.room_id, extrem_ids, + ) + state_sets = list(state_sets.values()) + state_sets.append(state) + current_state_ids = yield self.state_handler.resolve_events( + room_version, state_sets, event, + ) + current_state_ids = { + k: e.event_id for k, e in iteritems(current_state_ids) + } + else: + current_state_ids = yield self.state_handler.get_current_state_ids( + event.room_id, latest_event_ids=extrem_ids, + ) + + # Now check if event pass auth against said current state + auth_types = auth_types_for_event(event) + current_state_ids = [ + e for k, e in iteritems(current_state_ids) + if k in auth_types + ] + + current_auth_events = yield self.store.get_events(current_state_ids) + current_auth_events = { + (e.type, e.state_key): e for e in current_auth_events.values() + } + + try: + self.auth.check(room_version, event, auth_events=current_auth_events) + except AuthError as e: + logger.warn( + "Failed current state auth resolution for %r because %s", + event, e, + ) + event.internal_metadata.soft_failed = True + + @defer.inlineCallbacks def on_query_auth(self, origin, event_id, room_id, remote_auth_chain, rejects, missing): in_room = yield self.auth.check_host_in_room( diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 563bb3cea3..7dfae78db0 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -18,7 +18,7 @@ import logging from twisted.internet import defer from synapse.api.constants import EventTypes, Membership -from synapse.api.errors import AuthError, Codes +from synapse.api.errors import AuthError, Codes, SynapseError from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator from synapse.handlers.presence import format_user_presence_state @@ -262,6 +262,10 @@ class InitialSyncHandler(BaseHandler): A JSON serialisable dict with the snapshot of the room. """ + blocked = yield self.store.is_room_blocked(room_id) + if blocked: + raise SynapseError(403, "This room has been blocked on this server") + user_id = requester.user.to_string() membership, member_event_id = yield self._check_in_room_or_world_readable( diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3981fe69ce..9b41c7b205 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -243,12 +243,19 @@ class EventCreationHandler(object): self.spam_checker = hs.get_spam_checker() - if self.config.block_events_without_consent_error is not None: + self._block_events_without_consent_error = ( + self.config.block_events_without_consent_error + ) + + # we need to construct a ConsentURIBuilder here, as it checks that the necessary + # config options, but *only* if we have a configuration for which we are + # going to need it. + if self._block_events_without_consent_error: self._consent_uri_builder = ConsentURIBuilder(self.config) @defer.inlineCallbacks def create_event(self, requester, event_dict, token_id=None, txn_id=None, - prev_events_and_hashes=None): + prev_events_and_hashes=None, require_consent=True): """ Given a dict from a client, create a new event. @@ -269,6 +276,9 @@ class EventCreationHandler(object): where *hashes* is a map from algorithm to hash. If None, they will be requested from the database. + + require_consent (bool): Whether to check if the requester has + consented to privacy policy. Raises: ResourceLimitError if server is blocked to some resource being exceeded @@ -310,7 +320,7 @@ class EventCreationHandler(object): ) is_exempt = yield self._is_exempt_from_privacy_policy(builder, requester) - if not is_exempt: + if require_consent and not is_exempt: yield self.assert_accepted_privacy_policy(requester) if token_id is not None: @@ -378,7 +388,7 @@ class EventCreationHandler(object): Raises: ConsentNotGivenError: if the user has not given consent yet """ - if self.config.block_events_without_consent_error is None: + if self._block_events_without_consent_error is None: return # exempt AS users from needing consent @@ -405,7 +415,7 @@ class EventCreationHandler(object): consent_uri = self._consent_uri_builder.build_user_consent_uri( requester.user.localpart, ) - msg = self.config.block_events_without_consent_error % { + msg = self._block_events_without_consent_error % { 'consent_uri': consent_uri, } raise ConsentNotGivenError( @@ -436,10 +446,11 @@ class EventCreationHandler(object): if event.is_state(): prev_state = yield self.deduplicate_state_event(event, context) - logger.info( - "Not bothering to persist duplicate state event %s", event.event_id, - ) if prev_state is not None: + logger.info( + "Not bothering to persist state event %s duplicated by %s", + event.event_id, prev_state.event_id, + ) defer.returnValue(prev_state) yield self.handle_new_client_event( diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index ba3856674d..37e87fc054 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -816,7 +816,7 @@ class PresenceHandler(object): if self.is_mine(observed_user): yield self.invite_presence(observed_user, observer_user) else: - yield self.federation.send_edu( + yield self.federation.build_and_send_edu( destination=observed_user.domain, edu_type="m.presence_invite", content={ @@ -836,7 +836,7 @@ class PresenceHandler(object): if self.is_mine(observer_user): yield self.accept_presence(observed_user, observer_user) else: - self.federation.send_edu( + self.federation.build_and_send_edu( destination=observer_user.domain, edu_type="m.presence_accept", content={ @@ -848,7 +848,7 @@ class PresenceHandler(object): state_dict = yield self.get_state(observed_user, as_event=False) state_dict = format_user_presence_state(state_dict, self.clock.time_msec()) - self.federation.send_edu( + self.federation.build_and_send_edu( destination=observer_user.domain, edu_type="m.presence", content={ diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 1dfbde84fd..a65c98ff5c 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -147,8 +147,14 @@ class BaseProfileHandler(BaseHandler): @defer.inlineCallbacks def set_displayname(self, target_user, requester, new_displayname, by_admin=False): - """target_user is the user whose displayname is to be changed; - auth_user is the user attempting to make this change.""" + """Set the displayname of a user + + Args: + target_user (UserID): the user whose displayname is to be changed. + requester (Requester): The user attempting to make this change. + new_displayname (str): The displayname to give this user. + by_admin (bool): Whether this change was made by an administrator. + """ if not self.hs.is_mine(target_user): raise SynapseError(400, "User is not hosted on this Home Server") diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 696469732c..274d2946ad 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -16,10 +16,8 @@ import logging from twisted.internet import defer -from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.types import get_domain_from_id - -from ._base import BaseHandler +from synapse.handlers._base import BaseHandler +from synapse.types import ReadReceipt logger = logging.getLogger(__name__) @@ -39,42 +37,17 @@ class ReceiptsHandler(BaseHandler): self.state = hs.get_state_handler() @defer.inlineCallbacks - def received_client_receipt(self, room_id, receipt_type, user_id, - event_id): - """Called when a client tells us a local user has read up to the given - event_id in the room. - """ - receipt = { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": [event_id], - "data": { - "ts": int(self.clock.time_msec()), - } - } - - is_new = yield self._handle_new_receipts([receipt]) - - if is_new: - # fire off a process in the background to send the receipt to - # remote servers - run_as_background_process( - 'push_receipts_to_remotes', self._push_remotes, receipt - ) - - @defer.inlineCallbacks def _received_remote_receipt(self, origin, content): """Called when we receive an EDU of type m.receipt from a remote HS. """ receipts = [ - { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": user_values["event_ids"], - "data": user_values.get("data", {}), - } + ReadReceipt( + room_id=room_id, + receipt_type=receipt_type, + user_id=user_id, + event_ids=user_values["event_ids"], + data=user_values.get("data", {}), + ) for room_id, room_values in content.items() for receipt_type, users in room_values.items() for user_id, user_values in users.items() @@ -90,14 +63,12 @@ class ReceiptsHandler(BaseHandler): max_batch_id = None for receipt in receipts: - room_id = receipt["room_id"] - receipt_type = receipt["receipt_type"] - user_id = receipt["user_id"] - event_ids = receipt["event_ids"] - data = receipt["data"] - res = yield self.store.insert_receipt( - room_id, receipt_type, user_id, event_ids, data + receipt.room_id, + receipt.receipt_type, + receipt.user_id, + receipt.event_ids, + receipt.data, ) if not res: @@ -115,7 +86,7 @@ class ReceiptsHandler(BaseHandler): # no new receipts defer.returnValue(False) - affected_room_ids = list(set([r["room_id"] for r in receipts])) + affected_room_ids = list(set([r.room_id for r in receipts])) self.notifier.on_new_event( "receipt_key", max_batch_id, rooms=affected_room_ids @@ -128,43 +99,26 @@ class ReceiptsHandler(BaseHandler): defer.returnValue(True) @defer.inlineCallbacks - def _push_remotes(self, receipt): - """Given a receipt, works out which remote servers should be - poked and pokes them. + def received_client_receipt(self, room_id, receipt_type, user_id, + event_id): + """Called when a client tells us a local user has read up to the given + event_id in the room. """ - try: - # TODO: optimise this to move some of the work to the workers. - room_id = receipt["room_id"] - receipt_type = receipt["receipt_type"] - user_id = receipt["user_id"] - event_ids = receipt["event_ids"] - data = receipt["data"] - - users = yield self.state.get_current_user_in_room(room_id) - remotedomains = set(get_domain_from_id(u) for u in users) - remotedomains = remotedomains.copy() - remotedomains.discard(self.server_name) - - logger.debug("Sending receipt to: %r", remotedomains) - - for domain in remotedomains: - self.federation.send_edu( - destination=domain, - edu_type="m.receipt", - content={ - room_id: { - receipt_type: { - user_id: { - "event_ids": event_ids, - "data": data, - } - } - }, - }, - key=(room_id, receipt_type, user_id), - ) - except Exception: - logger.exception("Error pushing receipts to remote servers") + receipt = ReadReceipt( + room_id=room_id, + receipt_type=receipt_type, + user_id=user_id, + event_ids=[event_id], + data={ + "ts": int(self.clock.time_msec()), + }, + ) + + is_new = yield self._handle_new_receipts([receipt]) + if not is_new: + return + + yield self.federation.send_read_receipt(receipt) @defer.inlineCallbacks def get_receipts_for_room(self, room_id, to_key): diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index c0e06929bd..58940e0320 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -23,7 +23,9 @@ from synapse.api.constants import LoginType from synapse.api.errors import ( AuthError, Codes, + ConsentNotGivenError, InvalidCaptchaError, + LimitExceededError, RegistrationError, SynapseError, ) @@ -60,6 +62,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler + self.ratelimiter = hs.get_registration_ratelimiter() self._next_generated_user_id = None @@ -149,6 +152,7 @@ class RegistrationHandler(BaseHandler): threepid=None, user_type=None, default_display_name=None, + address=None, ): """Registers a new client on the server. @@ -167,6 +171,7 @@ class RegistrationHandler(BaseHandler): api.constants.UserTypes, or None for a normal user. default_display_name (unicode|None): if set, the new user's displayname will be set to this. Defaults to 'localpart'. + address (str|None): the IP address used to perform the registration. Returns: A tuple of (user_id, access_token). Raises: @@ -206,7 +211,7 @@ class RegistrationHandler(BaseHandler): token = None if generate_token: token = self.macaroon_gen.generate_access_token(user_id) - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -215,6 +220,7 @@ class RegistrationHandler(BaseHandler): create_profile_with_displayname=default_display_name, admin=admin, user_type=user_type, + address=address, ) if self.hs.config.user_directory_search_all_users: @@ -238,12 +244,13 @@ class RegistrationHandler(BaseHandler): if default_display_name is None: default_display_name = localpart try: - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, make_guest=make_guest, create_profile_with_displayname=default_display_name, + address=address, ) except SynapseError: # if user id is taken, just generate another @@ -305,6 +312,10 @@ class RegistrationHandler(BaseHandler): ) else: yield self._join_user_to_room(fake_requester, r) + except ConsentNotGivenError as e: + # Technically not necessary to pull out this error though + # moving away from bare excepts is a good thing to do. + logger.error("Failed to join new user to %r: %r", r, e) except Exception as e: logger.error("Failed to join new user to %r: %r", r, e) @@ -337,7 +348,7 @@ class RegistrationHandler(BaseHandler): user_id, allowed_appservice=service ) - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, password_hash="", appservice_id=service_id, @@ -513,7 +524,7 @@ class RegistrationHandler(BaseHandler): token = self.macaroon_gen.generate_access_token(user_id) if need_register: - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -590,10 +601,10 @@ class RegistrationHandler(BaseHandler): ratelimit=False, ) - def _register_with_store(self, user_id, token=None, password_hash=None, - was_guest=False, make_guest=False, appservice_id=None, - create_profile_with_displayname=None, admin=False, - user_type=None): + def register_with_store(self, user_id, token=None, password_hash=None, + was_guest=False, make_guest=False, appservice_id=None, + create_profile_with_displayname=None, admin=False, + user_type=None, address=None): """Register user in the datastore. Args: @@ -612,10 +623,26 @@ class RegistrationHandler(BaseHandler): admin (boolean): is an admin user? user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. + address (str|None): the IP address used to perform the registration. Returns: Deferred """ + # Don't rate limit for app services + if appservice_id is None and address is not None: + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + address, time_now_s=time_now, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)), + ) + if self.hs.config.worker_app: return self._register_client( user_id=user_id, @@ -627,6 +654,7 @@ class RegistrationHandler(BaseHandler): create_profile_with_displayname=create_profile_with_displayname, admin=admin, user_type=user_type, + address=address, ) else: return self.store.register( @@ -693,9 +721,9 @@ class RegistrationHandler(BaseHandler): access_token (str|None): The access token of the newly logged in device, or None if `inhibit_login` enabled. bind_email (bool): Whether to bind the email with the identity - server + server. bind_msisdn (bool): Whether to bind the msisdn with the identity - server + server. """ if self.hs.config.worker_app: yield self._post_registration_client( @@ -737,7 +765,7 @@ class RegistrationHandler(BaseHandler): """A user consented to the terms on registration Args: - user_id (str): The user ID that consented + user_id (str): The user ID that consented. consent_version (str): version of the policy the user has consented to. """ diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index afa508d729..d6c9d56007 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -44,6 +44,7 @@ EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) class RoomListHandler(BaseHandler): def __init__(self, hs): super(RoomListHandler, self).__init__(hs) + self.enable_room_list_search = hs.config.enable_room_list_search self.response_cache = ResponseCache(hs, "room_list") self.remote_response_cache = ResponseCache(hs, "remote_room_list", timeout_ms=30 * 1000) @@ -66,10 +67,17 @@ class RoomListHandler(BaseHandler): appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. """ + if not self.enable_room_list_search: + return defer.succeed({ + "chunk": [], + "total_room_count_estimate": 0, + }) + logger.info( "Getting public room list: limit=%r, since=%r, search=%r, network=%r", limit, since_token, bool(search_filter), network_tuple, ) + if search_filter: # We explicitly don't bother caching searches or requests for # appservice specific lists. @@ -441,6 +449,12 @@ class RoomListHandler(BaseHandler): def get_remote_public_room_list(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): + if not self.enable_room_list_search: + defer.returnValue({ + "chunk": [], + "total_room_count_estimate": 0, + }) + if search_filter: # We currently don't support searching across federation, so we have # to do it manually without pagination diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 190ea2c7b1..71ce5b54e5 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -160,6 +160,7 @@ class RoomMemberHandler(object): txn_id=None, ratelimit=True, content=None, + require_consent=True, ): user_id = target.to_string() @@ -185,6 +186,7 @@ class RoomMemberHandler(object): token_id=requester.access_token_id, txn_id=txn_id, prev_events_and_hashes=prev_events_and_hashes, + require_consent=require_consent, ) # Check if this event matches the previous membership event for the user. @@ -232,6 +234,10 @@ class RoomMemberHandler(object): self.copy_room_tags_and_direct_to_room( predecessor["room_id"], room_id, user_id, ) + # Move over old push rules + self.store.move_push_rules_from_room_to_room_for_user( + predecessor["room_id"], room_id, user_id, + ) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) @@ -301,6 +307,7 @@ class RoomMemberHandler(object): third_party_signed=None, ratelimit=True, content=None, + require_consent=True, ): key = (room_id,) @@ -315,6 +322,7 @@ class RoomMemberHandler(object): third_party_signed=third_party_signed, ratelimit=ratelimit, content=content, + require_consent=require_consent, ) defer.returnValue(result) @@ -331,6 +339,7 @@ class RoomMemberHandler(object): third_party_signed=None, ratelimit=True, content=None, + require_consent=True, ): content_specified = bool(content) if content is None: @@ -512,6 +521,7 @@ class RoomMemberHandler(object): ratelimit=ratelimit, prev_events_and_hashes=prev_events_and_hashes, content=content, + require_consent=require_consent, ) defer.returnValue(res) diff --git a/synapse/handlers/state_deltas.py b/synapse/handlers/state_deltas.py new file mode 100644 index 0000000000..b268bbcb2c --- /dev/null +++ b/synapse/handlers/state_deltas.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +logger = logging.getLogger(__name__) + + +class StateDeltasHandler(object): + + def __init__(self, hs): + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def _get_key_change(self, prev_event_id, event_id, key_name, public_value): + """Given two events check if the `key_name` field in content changed + from not matching `public_value` to doing so. + + For example, check if `history_visibility` (`key_name`) changed from + `shared` to `world_readable` (`public_value`). + + Returns: + None if the field in the events either both match `public_value` + or if neither do, i.e. there has been no change. + True if it didnt match `public_value` but now does + False if it did match `public_value` but now doesn't + """ + prev_event = None + event = None + if prev_event_id: + prev_event = yield self.store.get_event(prev_event_id, allow_none=True) + + if event_id: + event = yield self.store.get_event(event_id, allow_none=True) + + if not event and not prev_event: + logger.debug("Neither event exists: %r %r", prev_event_id, event_id) + defer.returnValue(None) + + prev_value = None + value = None + + if prev_event: + prev_value = prev_event.content.get(key_name) + + if event: + value = event.content.get(key_name) + + logger.debug("prev_value: %r -> value: %r", prev_value, value) + + if value == public_value and prev_value != public_value: + defer.returnValue(True) + elif value != public_value and prev_value == public_value: + defer.returnValue(False) + else: + defer.returnValue(None) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index bd97241ab4..57bb996245 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -39,6 +39,9 @@ from synapse.visibility import filter_events_for_client logger = logging.getLogger(__name__) +# Debug logger for https://github.com/matrix-org/synapse/issues/4422 +issue4422_logger = logging.getLogger("synapse.handler.sync.4422_debug") + # Counts the number of times we returned a non-empty sync. `type` is one of # "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is @@ -962,6 +965,15 @@ class SyncHandler(object): yield self._generate_sync_entry_for_groups(sync_result_builder) + # debug for https://github.com/matrix-org/synapse/issues/4422 + for joined_room in sync_result_builder.joined: + room_id = joined_room.room_id + if room_id in newly_joined_rooms: + issue4422_logger.debug( + "Sync result for newly joined room %s: %r", + room_id, joined_room, + ) + defer.returnValue(SyncResult( presence=sync_result_builder.presence, account_data=sync_result_builder.account_data, @@ -1425,6 +1437,17 @@ class SyncHandler(object): old_mem_ev = yield self.store.get_event( old_mem_ev_id, allow_none=True ) + + # debug for #4422 + if has_join: + prev_membership = None + if old_mem_ev: + prev_membership = old_mem_ev.membership + issue4422_logger.debug( + "Previous membership for room %s with join: %s (event %s)", + room_id, prev_membership, old_mem_ev_id, + ) + if not old_mem_ev or old_mem_ev.membership != Membership.JOIN: newly_joined_rooms.append(room_id) @@ -1519,30 +1542,39 @@ class SyncHandler(object): for room_id in sync_result_builder.joined_room_ids: room_entry = room_to_events.get(room_id, None) + newly_joined = room_id in newly_joined_rooms if room_entry: events, start_key = room_entry prev_batch_token = now_token.copy_and_replace("room_key", start_key) - room_entries.append(RoomSyncResultBuilder( + entry = RoomSyncResultBuilder( room_id=room_id, rtype="joined", events=events, - newly_joined=room_id in newly_joined_rooms, + newly_joined=newly_joined, full_state=False, - since_token=None if room_id in newly_joined_rooms else since_token, + since_token=None if newly_joined else since_token, upto_token=prev_batch_token, - )) + ) else: - room_entries.append(RoomSyncResultBuilder( + entry = RoomSyncResultBuilder( room_id=room_id, rtype="joined", events=[], - newly_joined=room_id in newly_joined_rooms, + newly_joined=newly_joined, full_state=False, since_token=since_token, upto_token=since_token, - )) + ) + + if newly_joined: + # debugging for https://github.com/matrix-org/synapse/issues/4422 + issue4422_logger.debug( + "RoomSyncResultBuilder events for newly joined room %s: %r", + room_id, entry.events, + ) + room_entries.append(entry) defer.returnValue((room_entries, invited, newly_joined_rooms, newly_left_rooms)) @@ -1663,6 +1695,13 @@ class SyncHandler(object): newly_joined_room=newly_joined, ) + if newly_joined: + # debug for https://github.com/matrix-org/synapse/issues/4422 + issue4422_logger.debug( + "Timeline events after filtering in newly-joined room %s: %r", + room_id, batch, + ) + # When we join the room (or the client requests full_state), we should # send down any existing tags. Usually the user won't have tags in a # newly joined room, unless either a) they've joined before or b) the @@ -1894,15 +1933,34 @@ def _calculate_state( class SyncResultBuilder(object): - "Used to help build up a new SyncResult for a user" + """Used to help build up a new SyncResult for a user + + Attributes: + sync_config (SyncConfig) + full_state (bool) + since_token (StreamToken) + now_token (StreamToken) + joined_room_ids (list[str]) + + # The following mirror the fields in a sync response + presence (list) + account_data (list) + joined (list[JoinedSyncResult]) + invited (list[InvitedSyncResult]) + archived (list[ArchivedSyncResult]) + device (list) + groups (GroupsSyncResult|None) + to_device (list) + """ def __init__(self, sync_config, full_state, since_token, now_token, joined_room_ids): """ Args: - sync_config(SyncConfig) - full_state(bool): The full_state flag as specified by user - since_token(StreamToken): The token supplied by user, or None. - now_token(StreamToken): The token to sync up to. + sync_config (SyncConfig) + full_state (bool): The full_state flag as specified by user + since_token (StreamToken): The token supplied by user, or None. + now_token (StreamToken): The token to sync up to. + joined_room_ids (list[str]): List of rooms the user is joined to """ self.sync_config = sync_config self.full_state = full_state @@ -1930,8 +1988,8 @@ class RoomSyncResultBuilder(object): Args: room_id(str) rtype(str): One of `"joined"` or `"archived"` - events(list): List of events to include in the room, (more events - may be added when generating result). + events(list[FrozenEvent]): List of events to include in the room + (more events may be added when generating result). newly_joined(bool): If the user has newly joined the room full_state(bool): Whether the full state should be sent in result since_token(StreamToken): Earliest point to return events from, or None diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index a61bbf9392..39df960c31 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -231,7 +231,7 @@ class TypingHandler(object): for domain in set(get_domain_from_id(u) for u in users): if domain != self.server_name: logger.debug("sending typing update to %s", domain) - self.federation.send_edu( + self.federation.build_and_send_edu( destination=domain, edu_type="m.typing", content={ diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 283c6c1b81..b689979b4b 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -15,12 +15,13 @@ import logging -from six import iteritems +from six import iteritems, iterkeys from twisted.internet import defer import synapse.metrics from synapse.api.constants import EventTypes, JoinRules, Membership +from synapse.handlers.state_deltas import StateDeltasHandler from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.roommember import ProfileInfo from synapse.types import get_localpart_from_id @@ -29,7 +30,7 @@ from synapse.util.metrics import Measure logger = logging.getLogger(__name__) -class UserDirectoryHandler(object): +class UserDirectoryHandler(StateDeltasHandler): """Handles querying of and keeping updated the user_directory. N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY @@ -38,19 +39,11 @@ class UserDirectoryHandler(object): world_readable or publically joinable room. We keep a database table up to date by streaming changes of the current state and recalculating whether users should be in the directory or not when necessary. - - For each user in the directory we also store a room_id which is public and that the - user is joined to. This allows us to ignore history_visibility and join_rules changes - for that user in all other public rooms, as we know they'll still be in at least - one public room. """ - INITIAL_ROOM_SLEEP_MS = 50 - INITIAL_ROOM_SLEEP_COUNT = 100 - INITIAL_ROOM_BATCH_SIZE = 100 - INITIAL_USER_SLEEP_MS = 10 - def __init__(self, hs): + super(UserDirectoryHandler, self).__init__(hs) + self.store = hs.get_datastore() self.state = hs.get_state_handler() self.server_name = hs.hostname @@ -59,15 +52,6 @@ class UserDirectoryHandler(object): self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.update_user_directory self.search_all_users = hs.config.user_directory_search_all_users - - # When start up for the first time we need to populate the user_directory. - # This is a set of user_id's we've inserted already - self.initially_handled_users = set() - self.initially_handled_users_in_public = set() - - self.initially_handled_users_share = set() - self.initially_handled_users_share_private_room = set() - # The current position in the current_state_delta stream self.pos = None @@ -130,7 +114,7 @@ class UserDirectoryHandler(object): # Support users are for diagnostics and should not appear in the user directory. if not is_support: yield self.store.update_profile_in_user_dir( - user_id, profile.display_name, profile.avatar_url, None + user_id, profile.display_name, profile.avatar_url ) @defer.inlineCallbacks @@ -140,7 +124,6 @@ class UserDirectoryHandler(object): # FIXME(#3714): We should probably do this in the same worker as all # the other changes. yield self.store.remove_from_user_dir(user_id) - yield self.store.remove_from_user_in_public_room(user_id) @defer.inlineCallbacks def _unsafe_process(self): @@ -148,10 +131,9 @@ class UserDirectoryHandler(object): if self.pos is None: self.pos = yield self.store.get_user_directory_stream_pos() - # If still None then we need to do the initial fill of directory + # If still None then the initial background update hasn't happened yet if self.pos is None: - yield self._do_initial_spam() - self.pos = yield self.store.get_user_directory_stream_pos() + defer.returnValue(None) # Loop round handling deltas until we're up to date while True: @@ -173,149 +155,6 @@ class UserDirectoryHandler(object): yield self.store.update_user_directory_stream_pos(self.pos) @defer.inlineCallbacks - def _do_initial_spam(self): - """Populates the user_directory from the current state of the DB, used - when synapse first starts with user_directory support - """ - new_pos = yield self.store.get_max_stream_id_in_current_state_deltas() - - # Delete any existing entries just in case there are any - yield self.store.delete_all_from_user_dir() - - # We process by going through each existing room at a time. - room_ids = yield self.store.get_all_rooms() - - logger.info("Doing initial update of user directory. %d rooms", len(room_ids)) - num_processed_rooms = 0 - - for room_id in room_ids: - logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids)) - yield self._handle_initial_room(room_id) - num_processed_rooms += 1 - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - logger.info("Processed all rooms.") - - if self.search_all_users: - num_processed_users = 0 - user_ids = yield self.store.get_all_local_users() - logger.info( - "Doing initial update of user directory. %d users", len(user_ids) - ) - for user_id in user_ids: - # We add profiles for all users even if they don't match the - # include pattern, just in case we want to change it in future - logger.info( - "Handling user %d/%d", num_processed_users + 1, len(user_ids) - ) - yield self._handle_local_user(user_id) - num_processed_users += 1 - yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.0) - - logger.info("Processed all users") - - self.initially_handled_users = None - self.initially_handled_users_in_public = None - self.initially_handled_users_share = None - self.initially_handled_users_share_private_room = None - - yield self.store.update_user_directory_stream_pos(new_pos) - - @defer.inlineCallbacks - def _handle_initial_room(self, room_id): - """Called when we initially fill out user_directory one room at a time - """ - is_in_room = yield self.store.is_host_joined(room_id, self.server_name) - if not is_in_room: - return - - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - room_id - ) - - users_with_profile = yield self.state.get_current_user_in_room(room_id) - user_ids = set(users_with_profile) - unhandled_users = user_ids - self.initially_handled_users - - yield self.store.add_profiles_to_user_dir( - room_id, - {user_id: users_with_profile[user_id] for user_id in unhandled_users}, - ) - - self.initially_handled_users |= unhandled_users - - if is_public: - yield self.store.add_users_to_public_room( - room_id, user_ids=user_ids - self.initially_handled_users_in_public - ) - self.initially_handled_users_in_public |= user_ids - - # We now go and figure out the new users who share rooms with user entries - # We sleep aggressively here as otherwise it can starve resources. - # We also batch up inserts/updates, but try to avoid too many at once. - to_insert = set() - to_update = set() - count = 0 - for user_id in user_ids: - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - if not self.is_mine_id(user_id): - count += 1 - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - count += 1 - continue - - for other_user_id in user_ids: - if user_id == other_user_id: - continue - - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - count += 1 - - user_set = (user_id, other_user_id) - - if user_set in self.initially_handled_users_share_private_room: - continue - - if user_set in self.initially_handled_users_share: - if is_public: - continue - to_update.add(user_set) - else: - to_insert.add(user_set) - - if is_public: - self.initially_handled_users_share.add(user_set) - else: - self.initially_handled_users_share_private_room.add(user_set) - - if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert - ) - to_insert.clear() - - if len(to_update) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - to_update.clear() - - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) - to_insert.clear() - - if to_update: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - to_update.clear() - - @defer.inlineCallbacks def _handle_deltas(self, deltas): """Called with the state deltas to process """ @@ -356,6 +195,7 @@ class UserDirectoryHandler(object): user_ids = yield self.store.get_users_in_dir_due_to_room( room_id ) + for user_id in user_ids: yield self._handle_remove_user(room_id, user_id) return @@ -436,14 +276,20 @@ class UserDirectoryHandler(object): # ignore the change return - if change: - users_with_profile = yield self.state.get_current_user_in_room(room_id) - for user_id, profile in iteritems(users_with_profile): - yield self._handle_new_user(room_id, user_id, profile) - else: - users = yield self.store.get_users_in_public_due_to_room(room_id) - for user_id in users: - yield self._handle_remove_user(room_id, user_id) + users_with_profile = yield self.state.get_current_user_in_room(room_id) + + # Remove every user from the sharing tables for that room. + for user_id in iterkeys(users_with_profile): + yield self.store.remove_user_who_share_room(user_id, room_id) + + # Then, re-add them to the tables. + # NOTE: this is not the most efficient method, as handle_new_user sets + # up local_user -> other_user and other_user_whos_local -> local_user, + # which when ran over an entire room, will result in the same values + # being added multiple times. The batching upserts shouldn't make this + # too bad, though. + for user_id, profile in iteritems(users_with_profile): + yield self._handle_new_user(room_id, user_id, profile) @defer.inlineCallbacks def _handle_local_user(self, user_id): @@ -457,7 +303,9 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_directory(user_id) if not row: - yield self.store.add_profiles_to_user_dir(None, {user_id: profile}) + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): @@ -469,177 +317,68 @@ class UserDirectoryHandler(object): """ logger.debug("Adding new user to dir, %r", user_id) - row = yield self.store.get_user_in_directory(user_id) - if not row: - yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) is_public = yield self.store.is_room_world_readable_or_publicly_joinable( room_id ) + # Now we update users who share rooms with users. + users_with_profile = yield self.state.get_current_user_in_room(room_id) if is_public: - row = yield self.store.get_user_in_public_room(user_id) - if not row: - yield self.store.add_users_to_public_room(room_id, [user_id]) + yield self.store.add_users_in_public_rooms(room_id, (user_id,)) else: - logger.debug("Not adding new user to public dir, %r", user_id) + to_insert = set() - # Now we update users who share rooms with users. We do this by getting - # all the current users in the room and seeing which aren't already - # marked in the database as sharing with `user_id` + # First, if they're our user then we need to update for every user + if self.is_mine_id(user_id): - users_with_profile = yield self.state.get_current_user_in_room(room_id) - - to_insert = set() - to_update = set() + is_appservice = self.store.get_if_app_services_interested_in_user( + user_id + ) - is_appservice = self.store.get_if_app_services_interested_in_user(user_id) + # We don't care about appservice users. + if not is_appservice: + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - # First, if they're our user then we need to update for every user - if self.is_mine_id(user_id) and not is_appservice: - # Returns a map of other_user_id -> shared_private. We only need - # to update mappings if for users that either don't share a room - # already (aren't in the map) or, if the room is private, those that - # only share a public room. - user_ids_shared = yield self.store.get_users_who_share_room_from_dir( - user_id - ) + to_insert.add((user_id, other_user_id)) + # Next we need to update for every local user in the room for other_user_id in users_with_profile: if user_id == other_user_id: continue - shared_is_private = user_ids_shared.get(other_user_id) - if shared_is_private is True: - # We've already marked in the database they share a private room - continue - elif shared_is_private is False: - # They already share a public room, so only update if this is - # a private room - if not is_public: - to_update.add((user_id, other_user_id)) - elif shared_is_private is None: - # This is the first time they both share a room - to_insert.add((user_id, other_user_id)) - - # Next we need to update for every local user in the room - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue - - is_appservice = self.store.get_if_app_services_interested_in_user( - other_user_id - ) - if self.is_mine_id(other_user_id) and not is_appservice: - shared_is_private = yield self.store.get_if_users_share_a_room( - other_user_id, user_id + is_appservice = self.store.get_if_app_services_interested_in_user( + other_user_id ) - if shared_is_private is True: - # We've already marked in the database they share a private room - continue - elif shared_is_private is False: - # They already share a public room, so only update if this is - # a private room - if not is_public: - to_update.add((other_user_id, user_id)) - elif shared_is_private is None: - # This is the first time they both share a room + if self.is_mine_id(other_user_id) and not is_appservice: to_insert.add((other_user_id, user_id)) - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) - - if to_update: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) + if to_insert: + yield self.store.add_users_who_share_private_room(room_id, to_insert) @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): - """Called when we might need to remove user to directory + """Called when we might need to remove user from directory Args: room_id (str): room_id that user left or stopped being public that user_id (str) """ - logger.debug("Maybe removing user %r", user_id) - - row = yield self.store.get_user_in_directory(user_id) - update_user_dir = row and row["room_id"] == room_id - - row = yield self.store.get_user_in_public_room(user_id) - update_user_in_public = row and row["room_id"] == room_id - - if update_user_in_public or update_user_dir: - # XXX: Make this faster? - rooms = yield self.store.get_rooms_for_user(user_id) - for j_room_id in rooms: - if not update_user_in_public and not update_user_dir: - break - - is_in_room = yield self.store.is_host_joined( - j_room_id, self.server_name - ) - - if not is_in_room: - continue + logger.debug("Removing user %r", user_id) - if update_user_dir: - update_user_dir = False - yield self.store.update_user_in_user_dir(user_id, j_room_id) + # Remove user from sharing tables + yield self.store.remove_user_who_share_room(user_id, room_id) - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) + # Are they still in any rooms? If not, remove them entirely. + rooms_user_is_in = yield self.store.get_user_dir_rooms_user_is_in(user_id) - if update_user_in_public and is_public: - yield self.store.update_user_in_public_user_list(user_id, j_room_id) - update_user_in_public = False - - if update_user_dir: + if len(rooms_user_is_in) == 0: yield self.store.remove_from_user_dir(user_id) - elif update_user_in_public: - yield self.store.remove_from_user_in_public_room(user_id) - - # Now handle users_who_share_rooms. - - # Get a list of user tuples that were in the DB due to this room and - # users (this includes tuples where the other user matches `user_id`) - user_tuples = yield self.store.get_users_in_share_dir_with_room_id( - user_id, room_id - ) - - for user_id, other_user_id in user_tuples: - # For each user tuple get a list of rooms that they still share, - # trying to find a private room, and update the entry in the DB - rooms = yield self.store.get_rooms_in_common_for_users( - user_id, other_user_id - ) - - # If they dont share a room anymore, remove the mapping - if not rooms: - yield self.store.remove_user_who_share_room(user_id, other_user_id) - continue - - found_public_share = None - for j_room_id in rooms: - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) - - if is_public: - found_public_share = j_room_id - else: - found_public_share = None - yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)] - ) - break - - if found_public_share: - yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)] - ) @defer.inlineCallbacks def _handle_profile_change(self, user_id, room_id, prev_event_id, event_id): @@ -665,50 +404,4 @@ class UserDirectoryHandler(object): new_avatar = event.content.get("avatar_url") if prev_name != new_name or prev_avatar != new_avatar: - yield self.store.update_profile_in_user_dir( - user_id, new_name, new_avatar, room_id - ) - - @defer.inlineCallbacks - def _get_key_change(self, prev_event_id, event_id, key_name, public_value): - """Given two events check if the `key_name` field in content changed - from not matching `public_value` to doing so. - - For example, check if `history_visibility` (`key_name`) changed from - `shared` to `world_readable` (`public_value`). - - Returns: - None if the field in the events either both match `public_value` - or if neither do, i.e. there has been no change. - True if it didnt match `public_value` but now does - False if it did match `public_value` but now doesn't - """ - prev_event = None - event = None - if prev_event_id: - prev_event = yield self.store.get_event(prev_event_id, allow_none=True) - - if event_id: - event = yield self.store.get_event(event_id, allow_none=True) - - if not event and not prev_event: - logger.debug("Neither event exists: %r %r", prev_event_id, event_id) - defer.returnValue(None) - - prev_value = None - value = None - - if prev_event: - prev_value = prev_event.content.get(key_name) - - if event: - value = event.content.get(key_name) - - logger.debug("prev_value: %r -> value: %r", prev_value, value) - - if value == public_value and prev_value != public_value: - defer.returnValue(True) - elif value != public_value and prev_value == public_value: - defer.returnValue(False) - else: - defer.returnValue(None) + yield self.store.update_profile_in_user_dir(user_id, new_name, new_avatar) diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 384d8a37a2..1334c630cc 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -68,9 +68,13 @@ class MatrixFederationAgent(object): TLS policy to use for fetching .well-known files. None to use a default (browser-like) implementation. - srv_resolver (SrvResolver|None): + _srv_resolver (SrvResolver|None): SRVResolver impl to use for looking up SRV records. None to use a default implementation. + + _well_known_cache (TTLCache|None): + TTLCache impl for storing cached well-known lookups. None to use a default + implementation. """ def __init__( diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 1682c9af13..ff63d0b2a8 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -189,6 +189,58 @@ class MatrixFederationHttpClient(object): self._cooperator = Cooperator(scheduler=schedule) @defer.inlineCallbacks + def _send_request_with_optional_trailing_slash( + self, + request, + try_trailing_slash_on_400=False, + **send_request_args + ): + """Wrapper for _send_request which can optionally retry the request + upon receiving a combination of a 400 HTTP response code and a + 'M_UNRECOGNIZED' errcode. This is a workaround for Synapse <= v0.99.3 + due to #3622. + + Args: + request (MatrixFederationRequest): details of request to be sent + try_trailing_slash_on_400 (bool): Whether on receiving a 400 + 'M_UNRECOGNIZED' from the server to retry the request with a + trailing slash appended to the request path. + send_request_args (Dict): A dictionary of arguments to pass to + `_send_request()`. + + Raises: + HttpResponseException: If we get an HTTP response code >= 300 + (except 429). + + Returns: + Deferred[Dict]: Parsed JSON response body. + """ + try: + response = yield self._send_request( + request, **send_request_args + ) + except HttpResponseException as e: + # Received an HTTP error > 300. Check if it meets the requirements + # to retry with a trailing slash + if not try_trailing_slash_on_400: + raise + + if e.code != 400 or e.to_synapse_error().errcode != "M_UNRECOGNIZED": + raise + + # Retry with a trailing slash if we received a 400 with + # 'M_UNRECOGNIZED' which some endpoints can return when omitting a + # trailing slash on Synapse <= v0.99.3. + logger.info("Retrying request with trailing slash") + request.path += "/" + + response = yield self._send_request( + request, **send_request_args + ) + + defer.returnValue(response) + + @defer.inlineCallbacks def _send_request( self, request, @@ -196,7 +248,7 @@ class MatrixFederationHttpClient(object): timeout=None, long_retries=False, ignore_backoff=False, - backoff_on_404=False + backoff_on_404=False, ): """ Sends a request to the given server. @@ -473,7 +525,8 @@ class MatrixFederationHttpClient(object): json_data_callback=None, long_retries=False, timeout=None, ignore_backoff=False, - backoff_on_404=False): + backoff_on_404=False, + try_trailing_slash_on_400=False): """ Sends the specifed json data using PUT Args: @@ -493,7 +546,12 @@ class MatrixFederationHttpClient(object): and try the request anyway. backoff_on_404 (bool): True if we should count a 404 response as a failure of the server (and should therefore back off future - requests) + requests). + try_trailing_slash_on_400 (bool): True if on a 400 M_UNRECOGNIZED + response we should try appending a trailing slash to the end + of the request. Workaround for #3622 in Synapse <= v0.99.3. This + will be attempted before backing off if backing off has been + enabled. Returns: Deferred[dict|list]: Succeeds when we get a 2xx HTTP response. The @@ -509,7 +567,6 @@ class MatrixFederationHttpClient(object): RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. """ - request = MatrixFederationRequest( method="PUT", destination=destination, @@ -519,17 +576,19 @@ class MatrixFederationHttpClient(object): json=data, ) - response = yield self._send_request( + response = yield self._send_request_with_optional_trailing_slash( request, + try_trailing_slash_on_400, + backoff_on_404=backoff_on_404, + ignore_backoff=ignore_backoff, long_retries=long_retries, timeout=timeout, - ignore_backoff=ignore_backoff, - backoff_on_404=backoff_on_404, ) body = yield _handle_json_response( self.hs.get_reactor(), self.default_timeout, request, response, ) + defer.returnValue(body) @defer.inlineCallbacks @@ -592,7 +651,8 @@ class MatrixFederationHttpClient(object): @defer.inlineCallbacks def get_json(self, destination, path, args=None, retry_on_dns_fail=True, - timeout=None, ignore_backoff=False): + timeout=None, ignore_backoff=False, + try_trailing_slash_on_400=False): """ GETs some json from the given host homeserver and path Args: @@ -606,6 +666,9 @@ class MatrixFederationHttpClient(object): be retried. ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. + try_trailing_slash_on_400 (bool): True if on a 400 M_UNRECOGNIZED + response we should try appending a trailing slash to the end of + the request. Workaround for #3622 in Synapse <= v0.99.3. Returns: Deferred[dict|list]: Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. @@ -631,16 +694,19 @@ class MatrixFederationHttpClient(object): query=args, ) - response = yield self._send_request( + response = yield self._send_request_with_optional_trailing_slash( request, + try_trailing_slash_on_400, + backoff_on_404=False, + ignore_backoff=ignore_backoff, retry_on_dns_fail=retry_on_dns_fail, timeout=timeout, - ignore_backoff=ignore_backoff, ) body = yield _handle_json_response( self.hs.get_reactor(), self.default_timeout, request, response, ) + defer.returnValue(body) @defer.inlineCallbacks diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index fc9a20ff59..235ce8334e 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -73,14 +73,26 @@ class ModuleApi(object): """ return self._auth_handler.check_user_exists(user_id) - def register(self, localpart): - """Registers a new user with given localpart + @defer.inlineCallbacks + def register(self, localpart, displayname=None): + """Registers a new user with given localpart and optional + displayname. + + Args: + localpart (str): The localpart of the new user. + displayname (str|None): The displayname of the new user. If None, + the user's displayname will default to `localpart`. Returns: Deferred: a 2-tuple of (user_id, access_token) """ + # Register the user reg = self.hs.get_registration_handler() - return reg.register(localpart=localpart) + user_id, access_token = yield reg.register( + localpart=localpart, default_display_name=displayname, + ) + + defer.returnValue((user_id, access_token)) @defer.inlineCallbacks def invalidate_access_token(self, access_token): diff --git a/synapse/notifier.py b/synapse/notifier.py index de02b1017e..ff589660da 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -178,8 +178,6 @@ class Notifier(object): self.remove_expired_streams, self.UNUSED_STREAM_EXPIRY_MS ) - self.replication_deferred = ObservableDeferred(defer.Deferred()) - # This is not a very cheap test to perform, but it's only executed # when rendering the metrics page, which is likely once per minute at # most when scraping it. @@ -205,7 +203,9 @@ class Notifier(object): def add_replication_callback(self, cb): """Add a callback that will be called when some new data is available. - Callback is not given any arguments. + Callback is not given any arguments. It should *not* return a Deferred - if + it needs to do any asynchronous work, a background thread should be started and + wrapped with run_as_background_process. """ self.replication_callbacks.append(cb) @@ -517,60 +517,5 @@ class Notifier(object): def notify_replication(self): """Notify the any replication listeners that there's a new event""" - with PreserveLoggingContext(): - deferred = self.replication_deferred - self.replication_deferred = ObservableDeferred(defer.Deferred()) - deferred.callback(None) - - # the callbacks may well outlast the current request, so we run - # them in the sentinel logcontext. - # - # (ideally it would be up to the callbacks to know if they were - # starting off background processes and drop the logcontext - # accordingly, but that requires more changes) - for cb in self.replication_callbacks: - cb() - - @defer.inlineCallbacks - def wait_for_replication(self, callback, timeout): - """Wait for an event to happen. - - Args: - callback: Gets called whenever an event happens. If this returns a - truthy value then ``wait_for_replication`` returns, otherwise - it waits for another event. - timeout: How many milliseconds to wait for callback return a truthy - value. - - Returns: - A deferred that resolves with the value returned by the callback. - """ - listener = _NotificationListener(None) - - end_time = self.clock.time_msec() + timeout - - while True: - listener.deferred = self.replication_deferred.observe() - result = yield callback() - if result: - break - - now = self.clock.time_msec() - if end_time <= now: - break - - listener.deferred = timeout_deferred( - listener.deferred, - timeout=(end_time - now) / 1000., - reactor=self.hs.get_reactor(), - ) - - try: - with PreserveLoggingContext(): - yield listener.deferred - except defer.TimeoutError: - break - except defer.CancelledError: - break - - defer.returnValue(result) + for cb in self.replication_callbacks: + cb() diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 1d27c9221f..912a5ac341 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -33,11 +33,12 @@ class ReplicationRegisterServlet(ReplicationEndpoint): def __init__(self, hs): super(ReplicationRegisterServlet, self).__init__(hs) self.store = hs.get_datastore() + self.registration_handler = hs.get_registration_handler() @staticmethod def _serialize_payload( user_id, token, password_hash, was_guest, make_guest, appservice_id, - create_profile_with_displayname, admin, user_type, + create_profile_with_displayname, admin, user_type, address, ): """ Args: @@ -56,6 +57,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): admin (boolean): is an admin user? user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. + address (str|None): the IP address used to perform the regitration. """ return { "token": token, @@ -66,13 +68,14 @@ class ReplicationRegisterServlet(ReplicationEndpoint): "create_profile_with_displayname": create_profile_with_displayname, "admin": admin, "user_type": user_type, + "address": address, } @defer.inlineCallbacks def _handle_request(self, request, user_id): content = parse_json_object_from_request(request) - yield self.store.register( + yield self.registration_handler.register_with_store( user_id=user_id, token=content["token"], password_hash=content["password_hash"], @@ -82,6 +85,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): create_profile_with_displayname=content["create_profile_with_displayname"], admin=content["admin"], user_type=content["user_type"], + address=content["address"] ) defer.returnValue((200, {})) diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py index 60641f1a49..5b8521c770 100644 --- a/synapse/replication/slave/storage/client_ips.py +++ b/synapse/replication/slave/storage/client_ips.py @@ -43,6 +43,8 @@ class SlavedClientIpStore(BaseSlavedStore): if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY: return + self.client_ip_last_seen.prefill(key, now) + self.hs.get_tcp_replication().send_user_ip( user_id, access_token, ip, user_agent, device_id, now ) diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py index 4f19fd35aa..4d59778863 100644 --- a/synapse/replication/slave/storage/deviceinbox.py +++ b/synapse/replication/slave/storage/deviceinbox.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.storage import DataStore +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker +from synapse.storage.deviceinbox import DeviceInboxWorkerStore from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.caches.stream_change_cache import StreamChangeCache -from ._base import BaseSlavedStore, __func__ -from ._slaved_id_tracker import SlavedIdTracker - -class SlavedDeviceInboxStore(BaseSlavedStore): +class SlavedDeviceInboxStore(DeviceInboxWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedDeviceInboxStore, self).__init__(db_conn, hs) self._device_inbox_id_gen = SlavedIdTracker( @@ -43,12 +42,6 @@ class SlavedDeviceInboxStore(BaseSlavedStore): expiry_ms=30 * 60 * 1000, ) - get_to_device_stream_token = __func__(DataStore.get_to_device_stream_token) - get_new_messages_for_device = __func__(DataStore.get_new_messages_for_device) - get_new_device_msgs_for_remote = __func__(DataStore.get_new_device_msgs_for_remote) - delete_messages_for_device = __func__(DataStore.delete_messages_for_device) - delete_device_msgs_for_remote = __func__(DataStore.delete_device_msgs_for_remote) - def stream_positions(self): result = super(SlavedDeviceInboxStore, self).stream_positions() result["to_device"] = self._device_inbox_id_gen.get_current_token() diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index ec2fd561cc..16c9a162c5 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.storage import DataStore -from synapse.storage.end_to_end_keys import EndToEndKeyStore +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker +from synapse.storage.devices import DeviceWorkerStore +from synapse.storage.end_to_end_keys import EndToEndKeyWorkerStore from synapse.util.caches.stream_change_cache import StreamChangeCache -from ._base import BaseSlavedStore, __func__ -from ._slaved_id_tracker import SlavedIdTracker - -class SlavedDeviceStore(BaseSlavedStore): +class SlavedDeviceStore(EndToEndKeyWorkerStore, DeviceWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedDeviceStore, self).__init__(db_conn, hs) @@ -38,17 +37,6 @@ class SlavedDeviceStore(BaseSlavedStore): "DeviceListFederationStreamChangeCache", device_list_max, ) - get_device_stream_token = __func__(DataStore.get_device_stream_token) - get_user_whose_devices_changed = __func__(DataStore.get_user_whose_devices_changed) - get_devices_by_remote = __func__(DataStore.get_devices_by_remote) - _get_devices_by_remote_txn = __func__(DataStore._get_devices_by_remote_txn) - _get_e2e_device_keys_txn = __func__(DataStore._get_e2e_device_keys_txn) - mark_as_sent_devices_by_remote = __func__(DataStore.mark_as_sent_devices_by_remote) - _mark_as_sent_devices_by_remote_txn = ( - __func__(DataStore._mark_as_sent_devices_by_remote_txn) - ) - count_e2e_one_time_keys = EndToEndKeyStore.__dict__["count_e2e_one_time_keys"] - def stream_positions(self): result = super(SlavedDeviceStore, self).stream_positions() result["device_lists"] = self._device_list_id_gen.get_current_token() @@ -58,14 +46,23 @@ class SlavedDeviceStore(BaseSlavedStore): if stream_name == "device_lists": self._device_list_id_gen.advance(token) for row in rows: - self._device_list_stream_cache.entity_has_changed( - row.user_id, token + self._invalidate_caches_for_devices( + token, row.user_id, row.destination, ) - - if row.destination: - self._device_list_federation_stream_cache.entity_has_changed( - row.destination, token - ) return super(SlavedDeviceStore, self).process_replication_rows( stream_name, token, rows ) + + def _invalidate_caches_for_devices(self, token, user_id, destination): + self._device_list_stream_cache.entity_has_changed( + user_id, token + ) + + if destination: + self._device_list_federation_stream_cache.entity_has_changed( + destination, token + ) + + self._get_cached_devices_for_user.invalidate((user_id,)) + self._get_cached_user_device.invalidate_many((user_id,)) + self.get_device_list_last_stream_id_for_remote.invalidate((user_id,)) diff --git a/synapse/replication/slave/storage/presence.py b/synapse/replication/slave/storage/presence.py index 92447b00d4..9e530defe0 100644 --- a/synapse/replication/slave/storage/presence.py +++ b/synapse/replication/slave/storage/presence.py @@ -54,8 +54,11 @@ class SlavedPresenceStore(BaseSlavedStore): def stream_positions(self): result = super(SlavedPresenceStore, self).stream_positions() - position = self._presence_id_gen.get_current_token() - result["presence"] = position + + if self.hs.config.use_presence: + position = self._presence_id_gen.get_current_token() + result["presence"] = position + return result def process_replication_rows(self, stream_name, token, rows): diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index f0200c1e98..45fc913c52 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -20,7 +20,7 @@ from ._slaved_id_tracker import SlavedIdTracker from .events import SlavedEventStore -class SlavedPushRuleStore(PushRulesWorkerStore, SlavedEventStore): +class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore): def __init__(self, db_conn, hs): self._push_rules_stream_id_gen = SlavedIdTracker( db_conn, "push_rules_stream", "stream_id", diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 586dddb40b..e558f90e1a 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -39,7 +39,7 @@ class ReplicationClientFactory(ReconnectingClientFactory): Accepts a handler that will be called when new data is available or data is required. """ - maxDelay = 5 # Try at least once every N seconds + maxDelay = 30 # Try at least once every N seconds def __init__(self, hs, client_name, handler): self.client_name = client_name @@ -54,7 +54,6 @@ class ReplicationClientFactory(ReconnectingClientFactory): def buildProtocol(self, addr): logger.info("Connected to replication: %r", addr) - self.resetDelay() return ClientReplicationStreamProtocol( self.client_name, self.server_name, self._clock, self.handler ) @@ -90,15 +89,18 @@ class ReplicationClientHandler(object): # Used for tests. self.awaiting_syncs = {} + # The factory used to create connections. + self.factory = None + def start_replication(self, hs): """Helper method to start a replication connection to the remote server using TCP. """ client_name = hs.config.worker_name - factory = ReplicationClientFactory(hs, client_name, self) + self.factory = ReplicationClientFactory(hs, client_name, self) host = hs.config.worker_replication_host port = hs.config.worker_replication_port - hs.get_reactor().connectTCP(host, port, factory) + hs.get_reactor().connectTCP(host, port, self.factory) def on_rdata(self, stream_name, token, rows): """Called when we get new replication data. By default this just pokes @@ -140,6 +142,7 @@ class ReplicationClientHandler(object): args["account_data"] = user_account_data elif room_account_data: args["account_data"] = room_account_data + return args def get_currently_syncing_users(self): @@ -204,3 +207,14 @@ class ReplicationClientHandler(object): for cmd in self.pending_commands: connection.send_command(cmd) self.pending_commands = [] + + def finished_connecting(self): + """Called when we have successfully subscribed and caught up to all + streams we're interested in. + """ + logger.info("Finished connecting to server") + + # We don't reset the delay any earlier as otherwise if there is a + # problem during start up we'll end up tight looping connecting to the + # server. + self.factory.resetDelay() diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 327556f6a1..2098c32a77 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -127,8 +127,11 @@ class RdataCommand(Command): class PositionCommand(Command): - """Sent by the client to tell the client the stream postition without + """Sent by the server to tell the client the stream postition without needing to send an RDATA. + + Sent to the client after all missing updates for a stream have been sent + to the client and they're now up to date. """ NAME = "POSITION" diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 429471c345..02e5bf6cc8 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -223,14 +223,25 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): return # Now lets try and call on_<CMD_NAME> function - try: - run_as_background_process( - "replication-" + cmd.get_logcontext_id(), - getattr(self, "on_%s" % (cmd_name,)), - cmd, - ) - except Exception: - logger.exception("[%s] Failed to handle line: %r", self.id(), line) + run_as_background_process( + "replication-" + cmd.get_logcontext_id(), + self.handle_command, + cmd, + ) + + def handle_command(self, cmd): + """Handle a command we have received over the replication stream. + + By default delegates to on_<COMMAND> + + Args: + cmd (synapse.replication.tcp.commands.Command): received command + + Returns: + Deferred + """ + handler = getattr(self, "on_%s" % (cmd.NAME,)) + return handler(cmd) def close(self): logger.warn("[%s] Closing connection", self.id()) @@ -364,8 +375,11 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): self.transport.unregisterProducer() def __str__(self): + addr = None + if self.transport: + addr = str(self.transport.getPeer()) return "ReplicationConnection<name=%s,conn_id=%s,addr=%s>" % ( - self.name, self.conn_id, self.addr, + self.name, self.conn_id, addr, ) def id(self): @@ -381,12 +395,11 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): VALID_INBOUND_COMMANDS = VALID_CLIENT_COMMANDS VALID_OUTBOUND_COMMANDS = VALID_SERVER_COMMANDS - def __init__(self, server_name, clock, streamer, addr): + def __init__(self, server_name, clock, streamer): BaseReplicationStreamProtocol.__init__(self, clock) # Old style class self.server_name = server_name self.streamer = streamer - self.addr = addr # The streams the client has subscribed to and is up to date with self.replication_streams = set() @@ -451,7 +464,7 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): @defer.inlineCallbacks def subscribe_to_stream(self, stream_name, token): - """Subscribe the remote to a streams. + """Subscribe the remote to a stream. This invloves checking if they've missed anything and sending those updates down if they have. During that time new updates for the stream @@ -478,11 +491,36 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): # Now we can send any updates that came in while we were subscribing pending_rdata = self.pending_rdata.pop(stream_name, []) + updates = [] for token, update in pending_rdata: - # Only send updates newer than the current token - if token > current_token: + # If the token is null, it is part of a batch update. Batches + # are multiple updates that share a single token. To denote + # this, the token is set to None for all tokens in the batch + # except for the last. If we find a None token, we keep looking + # through tokens until we find one that is not None and then + # process all previous updates in the batch as if they had the + # final token. + if token is None: + # Store this update as part of a batch + updates.append(update) + continue + + if token <= current_token: + # This update or batch of updates is older than + # current_token, dismiss it + updates = [] + continue + + updates.append(update) + + # Send all updates that are part of this batch with the + # found token + for update in updates: self.send_command(RdataCommand(stream_name, token, update)) + # Clear stored updates + updates = [] + # They're now fully subscribed self.replication_streams.add(stream_name) except Exception as e: @@ -526,6 +564,11 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.server_name = server_name self.handler = handler + # Set of stream names that have been subscribe to, but haven't yet + # caught up with. This is used to track when the client has been fully + # connected to the remote. + self.streams_connecting = set() + # Map of stream to batched updates. See RdataCommand for info on how # batching works. self.pending_batches = {} @@ -548,6 +591,10 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): # We've now finished connecting to so inform the client handler self.handler.update_connection(self) + # This will happen if we don't actually subscribe to any streams + if not self.streams_connecting: + self.handler.finished_connecting() + def on_SERVER(self, cmd): if cmd.data != self.server_name: logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data) @@ -577,6 +624,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): return self.handler.on_rdata(stream_name, cmd.token, rows) def on_POSITION(self, cmd): + # When we get a `POSITION` command it means we've finished getting + # missing updates for the given stream, and are now up to date. + self.streams_connecting.discard(cmd.stream_name) + if not self.streams_connecting: + self.handler.finished_connecting() + return self.handler.on_position(cmd.stream_name, cmd.token) def on_SYNC(self, cmd): @@ -593,6 +646,8 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.id(), stream_name, token ) + self.streams_connecting.add(stream_name) + self.send_command(ReplicateCommand(stream_name, token)) def on_connection_closed(self): diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index fd59f1595f..7fc346c7b6 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -16,6 +16,7 @@ """ import logging +import random from six import itervalues @@ -56,7 +57,6 @@ class ReplicationStreamProtocolFactory(Factory): self.server_name, self.clock, self.streamer, - addr ) @@ -74,6 +74,8 @@ class ReplicationStreamer(object): self.notifier = hs.get_notifier() self._server_notices_sender = hs.get_server_notices_sender() + self._replication_torture_level = hs.config.replication_torture_level + # Current connections. self.connections = [] @@ -157,10 +159,23 @@ class ReplicationStreamer(object): for stream in self.streams: stream.advance_current_token() - for stream in self.streams: + all_streams = self.streams + + if self._replication_torture_level is not None: + # there is no guarantee about ordering between the streams, + # so let's shuffle them around a bit when we are in torture mode. + all_streams = list(all_streams) + random.shuffle(all_streams) + + for stream in all_streams: if stream.last_token == stream.upto_token: continue + if self._replication_torture_level: + yield self.clock.sleep( + self._replication_torture_level / 1000.0 + ) + logger.debug( "Getting stream: %s: %s -> %s", stream.NAME, stream.last_token, stream.upto_token diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py index c1e626be3f..e23084baae 100644 --- a/synapse/replication/tcp/streams.py +++ b/synapse/replication/tcp/streams.py @@ -23,7 +23,7 @@ Each stream is defined by the following information: current_token: The function that returns the current token for the stream update_function: The function that returns a list of updates between two tokens """ - +import itertools import logging from collections import namedtuple @@ -195,8 +195,8 @@ class Stream(object): limit=MAX_EVENTS_BEHIND + 1, ) - if len(rows) >= MAX_EVENTS_BEHIND: - raise Exception("stream %s has fallen behind" % (self.NAME)) + # never turn more than MAX_EVENTS_BEHIND + 1 into updates. + rows = itertools.islice(rows, MAX_EVENTS_BEHIND + 1) else: rows = yield self.update_function( from_token, current_token, @@ -204,6 +204,11 @@ class Stream(object): updates = [(row[0], self.ROW_TYPE(*row[1:])) for row in rows] + # check we didn't get more rows than the limit. + # doing it like this allows the update_function to be a generator. + if self._LIMITED and len(updates) >= MAX_EVENTS_BEHIND: + raise Exception("stream %s has fallen behind" % (self.NAME)) + defer.returnValue((updates, current_token)) def current_token(self): diff --git a/synapse/res/templates/notif.html b/synapse/res/templates/notif.html index 88b921ca9c..1a6c70b562 100644 --- a/synapse/res/templates/notif.html +++ b/synapse/res/templates/notif.html @@ -6,11 +6,11 @@ <img alt="" class="sender_avatar" src="{{ message.sender_avatar_url|mxc_to_http(32,32) }}" /> {% else %} {% if message.sender_hash % 3 == 0 %} - <img class="sender_avatar" src="https://vector.im/beta/img/76cfa6.png" /> + <img class="sender_avatar" src="https://riot.im/img/external/avatar-1.png" /> {% elif message.sender_hash % 3 == 1 %} - <img class="sender_avatar" src="https://vector.im/beta/img/50e2c2.png" /> + <img class="sender_avatar" src="https://riot.im/img/external/avatar-2.png" /> {% else %} - <img class="sender_avatar" src="https://vector.im/beta/img/f4c371.png" /> + <img class="sender_avatar" src="https://riot.im/img/external/avatar-3.png" /> {% endif %} {% endif %} {% endif %} diff --git a/synapse/res/templates/notif_mail.html b/synapse/res/templates/notif_mail.html index fcdb3109fe..019506e5fb 100644 --- a/synapse/res/templates/notif_mail.html +++ b/synapse/res/templates/notif_mail.html @@ -19,7 +19,7 @@ </td> <td class="logo"> {% if app_name == "Riot" %} - <img src="http://matrix.org/img/riot-logo-email.png" width="83" height="83" alt="[Riot]"/> + <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/> {% elif app_name == "Vector" %} <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/> {% else %} diff --git a/synapse/res/templates/room.html b/synapse/res/templates/room.html index 723c222d25..b8525fef88 100644 --- a/synapse/res/templates/room.html +++ b/synapse/res/templates/room.html @@ -5,11 +5,11 @@ <img alt="" src="{{ room.avatar_url|mxc_to_http(48,48) }}" /> {% else %} {% if room.hash % 3 == 0 %} - <img alt="" src="https://vector.im/beta/img/76cfa6.png" /> + <img alt="" src="https://riot.im/img/external/avatar-1.png" /> {% elif room.hash % 3 == 1 %} - <img alt="" src="https://vector.im/beta/img/50e2c2.png" /> + <img alt="" src="https://riot.im/img/external/avatar-2.png" /> {% else %} - <img alt="" src="https://vector.im/beta/img/f4c371.png" /> + <img alt="" src="https://riot.im/img/external/avatar-3.png" /> {% endif %} {% endif %} </td> diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 82433a2aa9..e788769639 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,12 +17,14 @@ import hashlib import hmac import logging +import platform from six import text_type from six.moves import http_client from twisted.internet import defer +import synapse from synapse.api.constants import Membership, UserTypes from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.servlet import ( @@ -32,6 +34,7 @@ from synapse.http.servlet import ( parse_string, ) from synapse.types import UserID, create_requester +from synapse.util.versionstring import get_version_string from .base import ClientV1RestServlet, client_path_patterns @@ -66,6 +69,25 @@ class UsersRestServlet(ClientV1RestServlet): defer.returnValue((200, ret)) +class VersionServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns("/admin/server_version") + + @defer.inlineCallbacks + def on_GET(self, request): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + + if not is_admin: + raise AuthError(403, "You are not a server admin") + + ret = { + 'server_version': get_version_string(synapse), + 'python_version': platform.python_version(), + } + + defer.returnValue((200, ret)) + + class UserRegisterServlet(ClientV1RestServlet): """ Attributes: @@ -466,53 +488,67 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): ) new_room_id = info["room_id"] - yield self.event_creation_handler.create_and_send_nonmember_event( - room_creator_requester, - { - "type": "m.room.message", - "content": {"body": message, "msgtype": "m.text"}, - "room_id": new_room_id, - "sender": new_room_user_id, - }, - ratelimit=False, - ) - requester_user_id = requester.user.to_string() - logger.info("Shutting down room %r", room_id) + logger.info( + "Shutting down room %r, joining to new room: %r", + room_id, new_room_id, + ) + # This will work even if the room is already blocked, but that is + # desirable in case the first attempt at blocking the room failed below. yield self.store.block_room(room_id, requester_user_id) users = yield self.state.get_current_user_in_room(room_id) kicked_users = [] + failed_to_kick_users = [] for user_id in users: if not self.hs.is_mine_id(user_id): continue logger.info("Kicking %r from %r...", user_id, room_id) - target_requester = create_requester(user_id) - yield self.room_member_handler.update_membership( - requester=target_requester, - target=target_requester.user, - room_id=room_id, - action=Membership.LEAVE, - content={}, - ratelimit=False - ) + try: + target_requester = create_requester(user_id) + yield self.room_member_handler.update_membership( + requester=target_requester, + target=target_requester.user, + room_id=room_id, + action=Membership.LEAVE, + content={}, + ratelimit=False, + require_consent=False, + ) - yield self.room_member_handler.forget(target_requester.user, room_id) + yield self.room_member_handler.forget(target_requester.user, room_id) - yield self.room_member_handler.update_membership( - requester=target_requester, - target=target_requester.user, - room_id=new_room_id, - action=Membership.JOIN, - content={}, - ratelimit=False - ) + yield self.room_member_handler.update_membership( + requester=target_requester, + target=target_requester.user, + room_id=new_room_id, + action=Membership.JOIN, + content={}, + ratelimit=False, + require_consent=False, + ) + + kicked_users.append(user_id) + except Exception: + logger.exception( + "Failed to leave old room and join new room for %r", user_id, + ) + failed_to_kick_users.append(user_id) - kicked_users.append(user_id) + yield self.event_creation_handler.create_and_send_nonmember_event( + room_creator_requester, + { + "type": "m.room.message", + "content": {"body": message, "msgtype": "m.text"}, + "room_id": new_room_id, + "sender": new_room_user_id, + }, + ratelimit=False, + ) aliases_for_room = yield self.store.get_aliases_for_room(room_id) @@ -522,6 +558,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): defer.returnValue((200, { "kicked_users": kicked_users, + "failed_to_kick_users": failed_to_kick_users, "local_aliases": aliases_for_room, "new_room_id": new_room_id, })) @@ -763,3 +800,4 @@ def register_servlets(hs, http_server): QuarantineMediaInRoom(hs).register(http_server) ListMediaInRoom(hs).register(http_server) UserRegisterServlet(hs).register(http_server) + VersionServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 6121c5b6df..5180e9eaf1 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -22,6 +22,7 @@ from twisted.internet import defer from twisted.web.client import PartialDownloadError from synapse.api.errors import Codes, LoginError, SynapseError +from synapse.api.ratelimiting import Ratelimiter from synapse.http.server import finish_request from synapse.http.servlet import ( RestServlet, @@ -97,6 +98,7 @@ class LoginRestServlet(ClientV1RestServlet): self.registration_handler = hs.get_registration_handler() self.handlers = hs.get_handlers() self._well_known_builder = WellKnownBuilder(hs) + self._address_ratelimiter = Ratelimiter() def on_GET(self, request): flows = [] @@ -129,6 +131,13 @@ class LoginRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request): + self._address_ratelimiter.ratelimit( + request.getClientIP(), time_now_s=self.hs.clock.time(), + rate_hz=self.hs.config.rc_login_address.per_second, + burst_count=self.hs.config.rc_login_address.burst_count, + update=True, + ) + login_submission = parse_json_object_from_request(request) try: if self.jwt_enabled and (login_submission["type"] == @@ -192,6 +201,24 @@ class LoginRestServlet(ClientV1RestServlet): # We store all email addreses as lowercase in the DB. # (See add_threepid in synapse/handlers/auth.py) address = address.lower() + + # Check for login providers that support 3pid login types + canonical_user_id, callback_3pid = ( + yield self.auth_handler.check_password_provider_3pid( + medium, + address, + login_submission["password"], + ) + ) + if canonical_user_id: + # Authentication through password provider and 3pid succeeded + result = yield self._register_device_with_callback( + canonical_user_id, login_submission, callback_3pid, + ) + defer.returnValue(result) + + # No password providers were able to handle this 3pid + # Check local store user_id = yield self.hs.get_datastore().get_user_id_by_threepid( medium, address, ) @@ -214,20 +241,43 @@ class LoginRestServlet(ClientV1RestServlet): if "user" not in identifier: raise SynapseError(400, "User identifier is missing 'user' key") - auth_handler = self.auth_handler - canonical_user_id, callback = yield auth_handler.validate_login( + canonical_user_id, callback = yield self.auth_handler.validate_login( identifier["user"], login_submission, ) + result = yield self._register_device_with_callback( + canonical_user_id, login_submission, callback, + ) + defer.returnValue(result) + + @defer.inlineCallbacks + def _register_device_with_callback( + self, + user_id, + login_submission, + callback=None, + ): + """ Registers a device with a given user_id. Optionally run a callback + function after registration has completed. + + Args: + user_id (str): ID of the user to register. + login_submission (dict): Dictionary of login information. + callback (func|None): Callback function to run after registration. + + Returns: + result (Dict[str,str]): Dictionary of account information after + successful registration. + """ device_id = login_submission.get("device_id") initial_display_name = login_submission.get("initial_device_display_name") device_id, access_token = yield self.registration_handler.register_device( - canonical_user_id, device_id, initial_display_name, + user_id, device_id, initial_display_name, ) result = { - "user_id": canonical_user_id, + "user_id": user_id, "access_token": access_token, "home_server": self.hs.hostname, "device_id": device_id, @@ -285,6 +335,7 @@ class LoginRestServlet(ClientV1RestServlet): raise LoginError(401, "Invalid JWT", errcode=Codes.UNAUTHORIZED) user_id = UserID(user, self.hs.hostname).to_string() + auth_handler = self.auth_handler registered_user_id = yield auth_handler.check_user_exists(user_id) if registered_user_id: diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 94cbba4303..6d235262c8 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -25,7 +25,12 @@ from twisted.internet import defer import synapse import synapse.types from synapse.api.constants import LoginType -from synapse.api.errors import Codes, SynapseError, UnrecognizedRequestError +from synapse.api.errors import ( + Codes, + LimitExceededError, + SynapseError, + UnrecognizedRequestError, +) from synapse.config.server import is_threepid_reserved from synapse.http.servlet import ( RestServlet, @@ -191,18 +196,36 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() + self.ratelimiter = hs.get_registration_ratelimiter() + self.clock = hs.get_clock() @interactive_auth_handler @defer.inlineCallbacks def on_POST(self, request): body = parse_json_object_from_request(request) + client_addr = request.getClientIP() + + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + client_addr, time_now_s=time_now, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, + update=False, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)), + ) + kind = b"user" if b"kind" in request.args: kind = request.args[b"kind"][0] if kind == b"guest": - ret = yield self._do_guest_registration(body) + ret = yield self._do_guest_registration(body, address=client_addr) defer.returnValue(ret) return elif kind != b"user": @@ -411,6 +434,7 @@ class RegisterRestServlet(RestServlet): guest_access_token=guest_access_token, generate_token=False, threepid=threepid, + address=client_addr, ) # Necessary due to auth checks prior to the threepid being # written to the db @@ -522,12 +546,13 @@ class RegisterRestServlet(RestServlet): defer.returnValue(result) @defer.inlineCallbacks - def _do_guest_registration(self, params): + def _do_guest_registration(self, params, address=None): if not self.hs.config.allow_guest_access: raise SynapseError(403, "Guest access is disabled") user_id, _ = yield self.registration_handler.register( generate_token=False, - make_guest=True + make_guest=True, + address=address, ) # we don't allow guests to specify their own device_id, because diff --git a/synapse/rest/client/v2_alpha/user_directory.py b/synapse/rest/client/v2_alpha/user_directory.py index cac0624ba7..36b02de37f 100644 --- a/synapse/rest/client/v2_alpha/user_directory.py +++ b/synapse/rest/client/v2_alpha/user_directory.py @@ -59,6 +59,12 @@ class UserDirectorySearchRestServlet(RestServlet): requester = yield self.auth.get_user_by_req(request, allow_guest=False) user_id = requester.user.to_string() + if not self.hs.config.user_directory_search_enabled: + defer.returnValue((200, { + "limited": False, + "results": [], + })) + body = parse_json_object_from_request(request) limit = body.get("limit", 10) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index d16a30acd8..953d89bd82 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -99,10 +100,29 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: - if is_ascii(upload_name): - disposition = "inline; filename=%s" % (_quote(upload_name),) + # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. + # + # `filename` is defined to be a `value`, which is defined by RFC2616 + # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token` + # is (essentially) a single US-ASCII word, and a `quoted-string` is a + # US-ASCII string surrounded by double-quotes, using backslash as an + # escape charater. Note that %-encoding is *not* permitted. + # + # `filename*` is defined to be an `ext-value`, which is defined in + # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`, + # where `value-chars` is essentially a %-encoded string in the given charset. + # + # [1]: https://tools.ietf.org/html/rfc6266#section-4.1 + # [2]: https://tools.ietf.org/html/rfc2616#section-3.6 + # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1 + + # We avoid the quoted-string version of `filename`, because (a) synapse didn't + # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we + # may as well just do the filename* version. + if _can_encode_filename_as_token(upload_name): + disposition = 'inline; filename=%s' % (upload_name, ) else: - disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name), ) request.setHeader(b"Content-Disposition", disposition.encode('ascii')) @@ -115,6 +135,35 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader(b"Content-Length", b"%d" % (file_size,)) +# separators as defined in RFC2616. SP and HT are handled separately. +# see _can_encode_filename_as_token. +_FILENAME_SEPARATOR_CHARS = set(( + "(", ")", "<", ">", "@", ",", ";", ":", "\\", '"', + "/", "[", "]", "?", "=", "{", "}", +)) + + +def _can_encode_filename_as_token(x): + for c in x: + # from RFC2616: + # + # token = 1*<any CHAR except CTLs or separators> + # + # separators = "(" | ")" | "<" | ">" | "@" + # | "," | ";" | ":" | "\" | <"> + # | "/" | "[" | "]" | "?" | "=" + # | "{" | "}" | SP | HT + # + # CHAR = <any US-ASCII character (octets 0 - 127)> + # + # CTL = <any US-ASCII control character + # (octets 0 - 31) and DEL (127)> + # + if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS: + return False + return True + + @defer.inlineCallbacks def respond_with_responder(request, responder, media_type, file_size, upload_name=None): """Responds to the request with given responder. If responder is None then @@ -213,8 +262,7 @@ def get_filename_from_headers(headers): Content-Disposition HTTP header. Args: - headers (twisted.web.http_headers.Headers): The HTTP - request headers. + headers (dict[bytes, list[bytes]]): The HTTP request headers. Returns: A Unicode string of the filename, or None. @@ -225,23 +273,12 @@ def get_filename_from_headers(headers): if not content_disposition[0]: return - # dict of unicode: bytes, corresponding to the key value sections of the - # Content-Disposition header. - params = {} - parts = content_disposition[0].split(b";") - for i in parts: - # Split into key-value pairs, if able - # We don't care about things like `inline`, so throw it out - if b"=" not in i: - continue - - key, value = i.strip().split(b"=") - params[key.decode('ascii')] = value + _, params = _parse_header(content_disposition[0]) upload_name = None # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) + upload_name_utf8 = params.get(b"filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith(b"utf-8''"): upload_name_utf8 = upload_name_utf8[7:] @@ -267,12 +304,68 @@ def get_filename_from_headers(headers): # If there isn't check for an ascii name. if not upload_name: - upload_name_ascii = params.get("filename", None) + upload_name_ascii = params.get(b"filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): - # Make sure there's no %-quoted bytes. If there is, reject it as - # non-valid ASCII. - if b"%" not in upload_name_ascii: - upload_name = upload_name_ascii.decode('ascii') + upload_name = upload_name_ascii.decode('ascii') # This may be None here, indicating we did not find a matching name. return upload_name + + +def _parse_header(line): + """Parse a Content-type like header. + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + line (bytes): header to be parsed + + Returns: + Tuple[bytes, dict[bytes, bytes]]: + the main content-type, followed by the parameter dictionary + """ + parts = _parseparam(b';' + line) + key = next(parts) + pdict = {} + for p in parts: + i = p.find(b'=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i + 1:].strip() + + # strip double-quotes + if len(value) >= 2 and value[0:1] == value[-1:] == b'"': + value = value[1:-1] + value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"') + pdict[name] = value + + return key, pdict + + +def _parseparam(s): + """Generator which splits the input on ;, respecting double-quoted sequences + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + s (bytes): header to be parsed + + Returns: + Iterable[bytes]: the split input + """ + while s[:1] == b';': + s = s[1:] + + # look for the next ; + end = s.find(b';') + + # if there is an odd number of " marks between here and the next ;, skip to the + # next ; instead + while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2: + end = s.find(b';', end + 1) + + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:] diff --git a/synapse/server.py b/synapse/server.py index 4d364fccce..dc8f1ccb8c 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -42,7 +42,7 @@ from synapse.federation.federation_server import ( ReplicationFederationHandlerRegistry, ) from synapse.federation.send_queue import FederationRemoteSendQueue -from synapse.federation.transaction_queue import TransactionQueue +from synapse.federation.sender import FederationSender from synapse.federation.transport.client import TransportLayerClient from synapse.groups.attestations import GroupAttestationSigning, GroupAttestionRenewer from synapse.groups.groups_server import GroupsServerHandler @@ -51,7 +51,7 @@ from synapse.handlers.acme import AcmeHandler from synapse.handlers.appservice import ApplicationServicesHandler from synapse.handlers.auth import AuthHandler, MacaroonGenerator from synapse.handlers.deactivate_account import DeactivateAccountHandler -from synapse.handlers.device import DeviceHandler +from synapse.handlers.device import DeviceHandler, DeviceWorkerHandler from synapse.handlers.devicemessage import DeviceMessageHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.e2e_room_keys import E2eRoomKeysHandler @@ -185,6 +185,10 @@ class HomeServer(object): 'registration_handler', ] + REQUIRED_ON_MASTER_STARTUP = [ + "user_directory_handler", + ] + # This is overridden in derived application classes # (such as synapse.app.homeserver.SynapseHomeServer) and gives the class to be # instantiated during setup() for future return by get_datastore() @@ -206,6 +210,7 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() self.ratelimiter = Ratelimiter() + self.registration_ratelimiter = Ratelimiter() self.datastore = None @@ -220,6 +225,15 @@ class HomeServer(object): conn.commit() logger.info("Finished setting up.") + def setup_master(self): + """ + Some handlers have side effects on instantiation (like registering + background updates). This function causes them to be fetched, and + therefore instantiated, to run those side effects. + """ + for i in self.REQUIRED_ON_MASTER_STARTUP: + getattr(self, "get_" + i)() + def get_reactor(self): """ Fetch the Twisted reactor in use by this HomeServer. @@ -251,6 +265,9 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter + def get_registration_ratelimiter(self): + return self.registration_ratelimiter + def build_federation_client(self): return FederationClient(self) @@ -307,7 +324,10 @@ class HomeServer(object): return MacaroonGenerator(self) def build_device_handler(self): - return DeviceHandler(self) + if self.config.worker_app: + return DeviceWorkerHandler(self) + else: + return DeviceHandler(self) def build_device_message_handler(self): return DeviceMessageHandler(self) @@ -414,7 +434,7 @@ class HomeServer(object): def build_federation_sender(self): if self.should_send_federation(): - return TransactionQueue(self) + return FederationSender(self) elif not self.config.worker_app: return FederationRemoteSendQueue(self) else: diff --git a/synapse/server.pyi b/synapse/server.pyi index 06cd083a74..3ba3a967c2 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -1,5 +1,6 @@ import synapse.api.auth import synapse.config.homeserver +import synapse.federation.sender import synapse.federation.transaction_queue import synapse.federation.transport.client import synapse.handlers @@ -7,9 +8,9 @@ import synapse.handlers.auth import synapse.handlers.deactivate_account import synapse.handlers.device import synapse.handlers.e2e_keys +import synapse.handlers.message import synapse.handlers.room import synapse.handlers.room_member -import synapse.handlers.message import synapse.handlers.set_password import synapse.rest.media.v1.media_repository import synapse.server_notices.server_notices_manager @@ -62,7 +63,7 @@ class HomeServer(object): def get_set_password_handler(self) -> synapse.handlers.set_password.SetPasswordHandler: pass - def get_federation_sender(self) -> synapse.federation.transaction_queue.TransactionQueue: + def get_federation_sender(self) -> synapse.federation.sender.FederationSender: pass def get_federation_transport_client(self) -> synapse.federation.transport.client.TransportLayerClient: diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a0333d5309..7e3903859b 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -767,18 +767,25 @@ class SQLBaseStore(object): """ allvalues = {} allvalues.update(keyvalues) - allvalues.update(values) allvalues.update(insertion_values) + if not values: + latter = "NOTHING" + else: + allvalues.update(values) + latter = ( + "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values) + ) + sql = ( "INSERT INTO %s (%s) VALUES (%s) " - "ON CONFLICT (%s) DO UPDATE SET %s" + "ON CONFLICT (%s) DO %s" ) % ( table, ", ".join(k for k in allvalues), ", ".join("?" for _ in allvalues), ", ".join(k for k in keyvalues), - ", ".join(k + "=EXCLUDED." + k for k in values), + latter ) txn.execute(sql, list(allvalues.values())) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 60cdc884e6..a2f8c23a65 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -52,7 +52,9 @@ class BackgroundUpdatePerformance(object): Returns: A duration in ms as a float """ - if self.total_item_count == 0: + if self.avg_duration_ms == 0: + return 0 + elif self.total_item_count == 0: return None else: # Use the exponential moving average so that we can adapt to @@ -64,7 +66,9 @@ class BackgroundUpdatePerformance(object): Returns: A duration in ms as a float """ - if self.total_item_count == 0: + if self.total_duration_ms == 0: + return 0 + elif self.total_item_count == 0: return None else: return float(self.total_item_count) / float(self.total_duration_ms) diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index e06b0bc56d..e6a42a53bb 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -19,14 +19,174 @@ from canonicaljson import json from twisted.internet import defer +from synapse.storage._base import SQLBaseStore +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.expiringcache import ExpiringCache -from .background_updates import BackgroundUpdateStore - logger = logging.getLogger(__name__) -class DeviceInboxStore(BackgroundUpdateStore): +class DeviceInboxWorkerStore(SQLBaseStore): + def get_to_device_stream_token(self): + return self._device_inbox_id_gen.get_current_token() + + def get_new_messages_for_device( + self, user_id, device_id, last_stream_id, current_stream_id, limit=100 + ): + """ + Args: + user_id(str): The recipient user_id. + device_id(str): The recipient device_id. + current_stream_id(int): The current position of the to device + message stream. + Returns: + Deferred ([dict], int): List of messages for the device and where + in the stream the messages got to. + """ + has_changed = self._device_inbox_stream_cache.has_entity_changed( + user_id, last_stream_id + ) + if not has_changed: + return defer.succeed(([], current_stream_id)) + + def get_new_messages_for_device_txn(txn): + sql = ( + "SELECT stream_id, message_json FROM device_inbox" + " WHERE user_id = ? AND device_id = ?" + " AND ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + " LIMIT ?" + ) + txn.execute(sql, ( + user_id, device_id, last_stream_id, current_stream_id, limit + )) + messages = [] + for row in txn: + stream_pos = row[0] + messages.append(json.loads(row[1])) + if len(messages) < limit: + stream_pos = current_stream_id + return (messages, stream_pos) + + return self.runInteraction( + "get_new_messages_for_device", get_new_messages_for_device_txn, + ) + + @defer.inlineCallbacks + def delete_messages_for_device(self, user_id, device_id, up_to_stream_id): + """ + Args: + user_id(str): The recipient user_id. + device_id(str): The recipient device_id. + up_to_stream_id(int): Where to delete messages up to. + Returns: + A deferred that resolves to the number of messages deleted. + """ + # If we have cached the last stream id we've deleted up to, we can + # check if there is likely to be anything that needs deleting + last_deleted_stream_id = self._last_device_delete_cache.get( + (user_id, device_id), None + ) + if last_deleted_stream_id: + has_changed = self._device_inbox_stream_cache.has_entity_changed( + user_id, last_deleted_stream_id + ) + if not has_changed: + defer.returnValue(0) + + def delete_messages_for_device_txn(txn): + sql = ( + "DELETE FROM device_inbox" + " WHERE user_id = ? AND device_id = ?" + " AND stream_id <= ?" + ) + txn.execute(sql, (user_id, device_id, up_to_stream_id)) + return txn.rowcount + + count = yield self.runInteraction( + "delete_messages_for_device", delete_messages_for_device_txn + ) + + # Update the cache, ensuring that we only ever increase the value + last_deleted_stream_id = self._last_device_delete_cache.get( + (user_id, device_id), 0 + ) + self._last_device_delete_cache[(user_id, device_id)] = max( + last_deleted_stream_id, up_to_stream_id + ) + + defer.returnValue(count) + + def get_new_device_msgs_for_remote( + self, destination, last_stream_id, current_stream_id, limit=100 + ): + """ + Args: + destination(str): The name of the remote server. + last_stream_id(int|long): The last position of the device message stream + that the server sent up to. + current_stream_id(int|long): The current position of the device + message stream. + Returns: + Deferred ([dict], int|long): List of messages for the device and where + in the stream the messages got to. + """ + + has_changed = self._device_federation_outbox_stream_cache.has_entity_changed( + destination, last_stream_id + ) + if not has_changed or last_stream_id == current_stream_id: + return defer.succeed(([], current_stream_id)) + + def get_new_messages_for_remote_destination_txn(txn): + sql = ( + "SELECT stream_id, messages_json FROM device_federation_outbox" + " WHERE destination = ?" + " AND ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + " LIMIT ?" + ) + txn.execute(sql, ( + destination, last_stream_id, current_stream_id, limit + )) + messages = [] + for row in txn: + stream_pos = row[0] + messages.append(json.loads(row[1])) + if len(messages) < limit: + stream_pos = current_stream_id + return (messages, stream_pos) + + return self.runInteraction( + "get_new_device_msgs_for_remote", + get_new_messages_for_remote_destination_txn, + ) + + def delete_device_msgs_for_remote(self, destination, up_to_stream_id): + """Used to delete messages when the remote destination acknowledges + their receipt. + + Args: + destination(str): The destination server_name + up_to_stream_id(int): Where to delete messages up to. + Returns: + A deferred that resolves when the messages have been deleted. + """ + def delete_messages_for_remote_destination_txn(txn): + sql = ( + "DELETE FROM device_federation_outbox" + " WHERE destination = ?" + " AND stream_id <= ?" + ) + txn.execute(sql, (destination, up_to_stream_id)) + + return self.runInteraction( + "delete_device_msgs_for_remote", + delete_messages_for_remote_destination_txn + ) + + +class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" def __init__(self, db_conn, hs): @@ -220,93 +380,6 @@ class DeviceInboxStore(BackgroundUpdateStore): txn.executemany(sql, rows) - def get_new_messages_for_device( - self, user_id, device_id, last_stream_id, current_stream_id, limit=100 - ): - """ - Args: - user_id(str): The recipient user_id. - device_id(str): The recipient device_id. - current_stream_id(int): The current position of the to device - message stream. - Returns: - Deferred ([dict], int): List of messages for the device and where - in the stream the messages got to. - """ - has_changed = self._device_inbox_stream_cache.has_entity_changed( - user_id, last_stream_id - ) - if not has_changed: - return defer.succeed(([], current_stream_id)) - - def get_new_messages_for_device_txn(txn): - sql = ( - "SELECT stream_id, message_json FROM device_inbox" - " WHERE user_id = ? AND device_id = ?" - " AND ? < stream_id AND stream_id <= ?" - " ORDER BY stream_id ASC" - " LIMIT ?" - ) - txn.execute(sql, ( - user_id, device_id, last_stream_id, current_stream_id, limit - )) - messages = [] - for row in txn: - stream_pos = row[0] - messages.append(json.loads(row[1])) - if len(messages) < limit: - stream_pos = current_stream_id - return (messages, stream_pos) - - return self.runInteraction( - "get_new_messages_for_device", get_new_messages_for_device_txn, - ) - - @defer.inlineCallbacks - def delete_messages_for_device(self, user_id, device_id, up_to_stream_id): - """ - Args: - user_id(str): The recipient user_id. - device_id(str): The recipient device_id. - up_to_stream_id(int): Where to delete messages up to. - Returns: - A deferred that resolves to the number of messages deleted. - """ - # If we have cached the last stream id we've deleted up to, we can - # check if there is likely to be anything that needs deleting - last_deleted_stream_id = self._last_device_delete_cache.get( - (user_id, device_id), None - ) - if last_deleted_stream_id: - has_changed = self._device_inbox_stream_cache.has_entity_changed( - user_id, last_deleted_stream_id - ) - if not has_changed: - defer.returnValue(0) - - def delete_messages_for_device_txn(txn): - sql = ( - "DELETE FROM device_inbox" - " WHERE user_id = ? AND device_id = ?" - " AND stream_id <= ?" - ) - txn.execute(sql, (user_id, device_id, up_to_stream_id)) - return txn.rowcount - - count = yield self.runInteraction( - "delete_messages_for_device", delete_messages_for_device_txn - ) - - # Update the cache, ensuring that we only ever increase the value - last_deleted_stream_id = self._last_device_delete_cache.get( - (user_id, device_id), 0 - ) - self._last_device_delete_cache[(user_id, device_id)] = max( - last_deleted_stream_id, up_to_stream_id - ) - - defer.returnValue(count) - def get_all_new_device_messages(self, last_pos, current_pos, limit): """ Args: @@ -351,77 +424,6 @@ class DeviceInboxStore(BackgroundUpdateStore): "get_all_new_device_messages", get_all_new_device_messages_txn ) - def get_to_device_stream_token(self): - return self._device_inbox_id_gen.get_current_token() - - def get_new_device_msgs_for_remote( - self, destination, last_stream_id, current_stream_id, limit=100 - ): - """ - Args: - destination(str): The name of the remote server. - last_stream_id(int|long): The last position of the device message stream - that the server sent up to. - current_stream_id(int|long): The current position of the device - message stream. - Returns: - Deferred ([dict], int|long): List of messages for the device and where - in the stream the messages got to. - """ - - has_changed = self._device_federation_outbox_stream_cache.has_entity_changed( - destination, last_stream_id - ) - if not has_changed or last_stream_id == current_stream_id: - return defer.succeed(([], current_stream_id)) - - def get_new_messages_for_remote_destination_txn(txn): - sql = ( - "SELECT stream_id, messages_json FROM device_federation_outbox" - " WHERE destination = ?" - " AND ? < stream_id AND stream_id <= ?" - " ORDER BY stream_id ASC" - " LIMIT ?" - ) - txn.execute(sql, ( - destination, last_stream_id, current_stream_id, limit - )) - messages = [] - for row in txn: - stream_pos = row[0] - messages.append(json.loads(row[1])) - if len(messages) < limit: - stream_pos = current_stream_id - return (messages, stream_pos) - - return self.runInteraction( - "get_new_device_msgs_for_remote", - get_new_messages_for_remote_destination_txn, - ) - - def delete_device_msgs_for_remote(self, destination, up_to_stream_id): - """Used to delete messages when the remote destination acknowledges - their receipt. - - Args: - destination(str): The destination server_name - up_to_stream_id(int): Where to delete messages up to. - Returns: - A deferred that resolves when the messages have been deleted. - """ - def delete_messages_for_remote_destination_txn(txn): - sql = ( - "DELETE FROM device_federation_outbox" - " WHERE destination = ?" - " AND stream_id <= ?" - ) - txn.execute(sql, (destination, up_to_stream_id)) - - return self.runInteraction( - "delete_device_msgs_for_remote", - delete_messages_for_remote_destination_txn - ) - @defer.inlineCallbacks def _background_drop_index_device_inbox(self, progress, batch_size): def reindex_txn(conn): diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index ecdab34e7d..e716dc1437 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -22,11 +22,10 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage._base import Cache, SQLBaseStore, db_to_json from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList -from ._base import Cache, db_to_json - logger = logging.getLogger(__name__) DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( @@ -34,7 +33,343 @@ DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( ) -class DeviceStore(BackgroundUpdateStore): +class DeviceWorkerStore(SQLBaseStore): + def get_device(self, user_id, device_id): + """Retrieve a device. + + Args: + user_id (str): The ID of the user which owns the device + device_id (str): The ID of the device to retrieve + Returns: + defer.Deferred for a dict containing the device information + Raises: + StoreError: if the device is not found + """ + return self._simple_select_one( + table="devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + retcols=("user_id", "device_id", "display_name"), + desc="get_device", + ) + + @defer.inlineCallbacks + def get_devices_by_user(self, user_id): + """Retrieve all of a user's registered devices. + + Args: + user_id (str): + Returns: + defer.Deferred: resolves to a dict from device_id to a dict + containing "device_id", "user_id" and "display_name" for each + device. + """ + devices = yield self._simple_select_list( + table="devices", + keyvalues={"user_id": user_id}, + retcols=("user_id", "device_id", "display_name"), + desc="get_devices_by_user" + ) + + defer.returnValue({d["device_id"]: d for d in devices}) + + def get_devices_by_remote(self, destination, from_stream_id): + """Get stream of updates to send to remote servers + + Returns: + (int, list[dict]): current stream id and list of updates + """ + now_stream_id = self._device_list_id_gen.get_current_token() + + has_changed = self._device_list_federation_stream_cache.has_entity_changed( + destination, int(from_stream_id) + ) + if not has_changed: + return (now_stream_id, []) + + return self.runInteraction( + "get_devices_by_remote", self._get_devices_by_remote_txn, + destination, from_stream_id, now_stream_id, + ) + + def _get_devices_by_remote_txn(self, txn, destination, from_stream_id, + now_stream_id): + sql = """ + SELECT user_id, device_id, max(stream_id) FROM device_lists_outbound_pokes + WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? + GROUP BY user_id, device_id + LIMIT 20 + """ + txn.execute( + sql, (destination, from_stream_id, now_stream_id, False) + ) + + # maps (user_id, device_id) -> stream_id + query_map = {(r[0], r[1]): r[2] for r in txn} + if not query_map: + return (now_stream_id, []) + + if len(query_map) >= 20: + now_stream_id = max(stream_id for stream_id in itervalues(query_map)) + + devices = self._get_e2e_device_keys_txn( + txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True + ) + + prev_sent_id_sql = """ + SELECT coalesce(max(stream_id), 0) as stream_id + FROM device_lists_outbound_last_success + WHERE destination = ? AND user_id = ? AND stream_id <= ? + """ + + results = [] + for user_id, user_devices in iteritems(devices): + # The prev_id for the first row is always the last row before + # `from_stream_id` + txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) + rows = txn.fetchall() + prev_id = rows[0][0] + for device_id, device in iteritems(user_devices): + stream_id = query_map[(user_id, device_id)] + result = { + "user_id": user_id, + "device_id": device_id, + "prev_id": [prev_id] if prev_id else [], + "stream_id": stream_id, + } + + prev_id = stream_id + + if device is not None: + key_json = device.get("key_json", None) + if key_json: + result["keys"] = db_to_json(key_json) + device_display_name = device.get("device_display_name", None) + if device_display_name: + result["device_display_name"] = device_display_name + else: + result["deleted"] = True + + results.append(result) + + return (now_stream_id, results) + + def mark_as_sent_devices_by_remote(self, destination, stream_id): + """Mark that updates have successfully been sent to the destination. + """ + return self.runInteraction( + "mark_as_sent_devices_by_remote", self._mark_as_sent_devices_by_remote_txn, + destination, stream_id, + ) + + def _mark_as_sent_devices_by_remote_txn(self, txn, destination, stream_id): + # We update the device_lists_outbound_last_success with the successfully + # poked users. We do the join to see which users need to be inserted and + # which updated. + sql = """ + SELECT user_id, coalesce(max(o.stream_id), 0), (max(s.stream_id) IS NOT NULL) + FROM device_lists_outbound_pokes as o + LEFT JOIN device_lists_outbound_last_success as s + USING (destination, user_id) + WHERE destination = ? AND o.stream_id <= ? + GROUP BY user_id + """ + txn.execute(sql, (destination, stream_id,)) + rows = txn.fetchall() + + sql = """ + UPDATE device_lists_outbound_last_success + SET stream_id = ? + WHERE destination = ? AND user_id = ? + """ + txn.executemany( + sql, ((row[1], destination, row[0],) for row in rows if row[2]) + ) + + sql = """ + INSERT INTO device_lists_outbound_last_success + (destination, user_id, stream_id) VALUES (?, ?, ?) + """ + txn.executemany( + sql, ((destination, row[0], row[1],) for row in rows if not row[2]) + ) + + # Delete all sent outbound pokes + sql = """ + DELETE FROM device_lists_outbound_pokes + WHERE destination = ? AND stream_id <= ? + """ + txn.execute(sql, (destination, stream_id,)) + + def get_device_stream_token(self): + return self._device_list_id_gen.get_current_token() + + @defer.inlineCallbacks + def get_user_devices_from_cache(self, query_list): + """Get the devices (and keys if any) for remote users from the cache. + + Args: + query_list(list): List of (user_id, device_ids), if device_ids is + falsey then return all device ids for that user. + + Returns: + (user_ids_not_in_cache, results_map), where user_ids_not_in_cache is + a set of user_ids and results_map is a mapping of + user_id -> device_id -> device_info + """ + user_ids = set(user_id for user_id, _ in query_list) + user_map = yield self.get_device_list_last_stream_id_for_remotes(list(user_ids)) + user_ids_in_cache = set( + user_id for user_id, stream_id in user_map.items() if stream_id + ) + user_ids_not_in_cache = user_ids - user_ids_in_cache + + results = {} + for user_id, device_id in query_list: + if user_id not in user_ids_in_cache: + continue + + if device_id: + device = yield self._get_cached_user_device(user_id, device_id) + results.setdefault(user_id, {})[device_id] = device + else: + results[user_id] = yield self._get_cached_devices_for_user(user_id) + + defer.returnValue((user_ids_not_in_cache, results)) + + @cachedInlineCallbacks(num_args=2, tree=True) + def _get_cached_user_device(self, user_id, device_id): + content = yield self._simple_select_one_onecol( + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + retcol="content", + desc="_get_cached_user_device", + ) + defer.returnValue(db_to_json(content)) + + @cachedInlineCallbacks() + def _get_cached_devices_for_user(self, user_id): + devices = yield self._simple_select_list( + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + }, + retcols=("device_id", "content"), + desc="_get_cached_devices_for_user", + ) + defer.returnValue({ + device["device_id"]: db_to_json(device["content"]) + for device in devices + }) + + def get_devices_with_keys_by_user(self, user_id): + """Get all devices (with any device keys) for a user + + Returns: + (stream_id, devices) + """ + return self.runInteraction( + "get_devices_with_keys_by_user", + self._get_devices_with_keys_by_user_txn, user_id, + ) + + def _get_devices_with_keys_by_user_txn(self, txn, user_id): + now_stream_id = self._device_list_id_gen.get_current_token() + + devices = self._get_e2e_device_keys_txn( + txn, [(user_id, None)], include_all_devices=True + ) + + if devices: + user_devices = devices[user_id] + results = [] + for device_id, device in iteritems(user_devices): + result = { + "device_id": device_id, + } + + key_json = device.get("key_json", None) + if key_json: + result["keys"] = db_to_json(key_json) + device_display_name = device.get("device_display_name", None) + if device_display_name: + result["device_display_name"] = device_display_name + + results.append(result) + + return now_stream_id, results + + return now_stream_id, [] + + @defer.inlineCallbacks + def get_user_whose_devices_changed(self, from_key): + """Get set of users whose devices have changed since `from_key`. + """ + from_key = int(from_key) + changed = self._device_list_stream_cache.get_all_entities_changed(from_key) + if changed is not None: + defer.returnValue(set(changed)) + + sql = """ + SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ? + """ + rows = yield self._execute("get_user_whose_devices_changed", None, sql, from_key) + defer.returnValue(set(row[0] for row in rows)) + + def get_all_device_list_changes_for_remotes(self, from_key, to_key): + """Return a list of `(stream_id, user_id, destination)` which is the + combined list of changes to devices, and which destinations need to be + poked. `destination` may be None if no destinations need to be poked. + """ + # We do a group by here as there can be a large number of duplicate + # entries, since we throw away device IDs. + sql = """ + SELECT MAX(stream_id) AS stream_id, user_id, destination + FROM device_lists_stream + LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) + WHERE ? < stream_id AND stream_id <= ? + GROUP BY user_id, destination + """ + return self._execute( + "get_all_device_list_changes_for_remotes", None, + sql, from_key, to_key + ) + + @cached(max_entries=10000) + def get_device_list_last_stream_id_for_remote(self, user_id): + """Get the last stream_id we got for a user. May be None if we haven't + got any information for them. + """ + return self._simple_select_one_onecol( + table="device_lists_remote_extremeties", + keyvalues={"user_id": user_id}, + retcol="stream_id", + desc="get_device_list_last_stream_id_for_remote", + allow_none=True, + ) + + @cachedList(cached_method_name="get_device_list_last_stream_id_for_remote", + list_name="user_ids", inlineCallbacks=True) + def get_device_list_last_stream_id_for_remotes(self, user_ids): + rows = yield self._simple_select_many_batch( + table="device_lists_remote_extremeties", + column="user_id", + iterable=user_ids, + retcols=("user_id", "stream_id",), + desc="get_device_list_last_stream_id_for_remotes", + ) + + results = {user_id: None for user_id in user_ids} + results.update({ + row["user_id"]: row["stream_id"] for row in rows + }) + + defer.returnValue(results) + + +class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): def __init__(self, db_conn, hs): super(DeviceStore, self).__init__(db_conn, hs) @@ -121,24 +456,6 @@ class DeviceStore(BackgroundUpdateStore): initial_device_display_name, e) raise StoreError(500, "Problem storing device.") - def get_device(self, user_id, device_id): - """Retrieve a device. - - Args: - user_id (str): The ID of the user which owns the device - device_id (str): The ID of the device to retrieve - Returns: - defer.Deferred for a dict containing the device information - Raises: - StoreError: if the device is not found - """ - return self._simple_select_one( - table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, - retcols=("user_id", "device_id", "display_name"), - desc="get_device", - ) - @defer.inlineCallbacks def delete_device(self, user_id, device_id): """Delete a device. @@ -203,57 +520,6 @@ class DeviceStore(BackgroundUpdateStore): ) @defer.inlineCallbacks - def get_devices_by_user(self, user_id): - """Retrieve all of a user's registered devices. - - Args: - user_id (str): - Returns: - defer.Deferred: resolves to a dict from device_id to a dict - containing "device_id", "user_id" and "display_name" for each - device. - """ - devices = yield self._simple_select_list( - table="devices", - keyvalues={"user_id": user_id}, - retcols=("user_id", "device_id", "display_name"), - desc="get_devices_by_user" - ) - - defer.returnValue({d["device_id"]: d for d in devices}) - - @cached(max_entries=10000) - def get_device_list_last_stream_id_for_remote(self, user_id): - """Get the last stream_id we got for a user. May be None if we haven't - got any information for them. - """ - return self._simple_select_one_onecol( - table="device_lists_remote_extremeties", - keyvalues={"user_id": user_id}, - retcol="stream_id", - desc="get_device_list_remote_extremity", - allow_none=True, - ) - - @cachedList(cached_method_name="get_device_list_last_stream_id_for_remote", - list_name="user_ids", inlineCallbacks=True) - def get_device_list_last_stream_id_for_remotes(self, user_ids): - rows = yield self._simple_select_many_batch( - table="device_lists_remote_extremeties", - column="user_id", - iterable=user_ids, - retcols=("user_id", "stream_id",), - desc="get_user_devices_from_cache", - ) - - results = {user_id: None for user_id in user_ids} - results.update({ - row["user_id"]: row["stream_id"] for row in rows - }) - - defer.returnValue(results) - - @defer.inlineCallbacks def mark_remote_user_device_list_as_unsubscribed(self, user_id): """Mark that we no longer track device lists for remote user. """ @@ -405,268 +671,6 @@ class DeviceStore(BackgroundUpdateStore): lock=False, ) - def get_devices_by_remote(self, destination, from_stream_id): - """Get stream of updates to send to remote servers - - Returns: - (int, list[dict]): current stream id and list of updates - """ - now_stream_id = self._device_list_id_gen.get_current_token() - - has_changed = self._device_list_federation_stream_cache.has_entity_changed( - destination, int(from_stream_id) - ) - if not has_changed: - return (now_stream_id, []) - - return self.runInteraction( - "get_devices_by_remote", self._get_devices_by_remote_txn, - destination, from_stream_id, now_stream_id, - ) - - def _get_devices_by_remote_txn(self, txn, destination, from_stream_id, - now_stream_id): - sql = """ - SELECT user_id, device_id, max(stream_id) FROM device_lists_outbound_pokes - WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? - GROUP BY user_id, device_id - LIMIT 20 - """ - txn.execute( - sql, (destination, from_stream_id, now_stream_id, False) - ) - - # maps (user_id, device_id) -> stream_id - query_map = {(r[0], r[1]): r[2] for r in txn} - if not query_map: - return (now_stream_id, []) - - if len(query_map) >= 20: - now_stream_id = max(stream_id for stream_id in itervalues(query_map)) - - devices = self._get_e2e_device_keys_txn( - txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True - ) - - prev_sent_id_sql = """ - SELECT coalesce(max(stream_id), 0) as stream_id - FROM device_lists_outbound_last_success - WHERE destination = ? AND user_id = ? AND stream_id <= ? - """ - - results = [] - for user_id, user_devices in iteritems(devices): - # The prev_id for the first row is always the last row before - # `from_stream_id` - txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) - rows = txn.fetchall() - prev_id = rows[0][0] - for device_id, device in iteritems(user_devices): - stream_id = query_map[(user_id, device_id)] - result = { - "user_id": user_id, - "device_id": device_id, - "prev_id": [prev_id] if prev_id else [], - "stream_id": stream_id, - } - - prev_id = stream_id - - if device is not None: - key_json = device.get("key_json", None) - if key_json: - result["keys"] = db_to_json(key_json) - device_display_name = device.get("device_display_name", None) - if device_display_name: - result["device_display_name"] = device_display_name - else: - result["deleted"] = True - - results.append(result) - - return (now_stream_id, results) - - @defer.inlineCallbacks - def get_user_devices_from_cache(self, query_list): - """Get the devices (and keys if any) for remote users from the cache. - - Args: - query_list(list): List of (user_id, device_ids), if device_ids is - falsey then return all device ids for that user. - - Returns: - (user_ids_not_in_cache, results_map), where user_ids_not_in_cache is - a set of user_ids and results_map is a mapping of - user_id -> device_id -> device_info - """ - user_ids = set(user_id for user_id, _ in query_list) - user_map = yield self.get_device_list_last_stream_id_for_remotes(list(user_ids)) - user_ids_in_cache = set( - user_id for user_id, stream_id in user_map.items() if stream_id - ) - user_ids_not_in_cache = user_ids - user_ids_in_cache - - results = {} - for user_id, device_id in query_list: - if user_id not in user_ids_in_cache: - continue - - if device_id: - device = yield self._get_cached_user_device(user_id, device_id) - results.setdefault(user_id, {})[device_id] = device - else: - results[user_id] = yield self._get_cached_devices_for_user(user_id) - - defer.returnValue((user_ids_not_in_cache, results)) - - @cachedInlineCallbacks(num_args=2, tree=True) - def _get_cached_user_device(self, user_id, device_id): - content = yield self._simple_select_one_onecol( - table="device_lists_remote_cache", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - retcol="content", - desc="_get_cached_user_device", - ) - defer.returnValue(db_to_json(content)) - - @cachedInlineCallbacks() - def _get_cached_devices_for_user(self, user_id): - devices = yield self._simple_select_list( - table="device_lists_remote_cache", - keyvalues={ - "user_id": user_id, - }, - retcols=("device_id", "content"), - desc="_get_cached_devices_for_user", - ) - defer.returnValue({ - device["device_id"]: db_to_json(device["content"]) - for device in devices - }) - - def get_devices_with_keys_by_user(self, user_id): - """Get all devices (with any device keys) for a user - - Returns: - (stream_id, devices) - """ - return self.runInteraction( - "get_devices_with_keys_by_user", - self._get_devices_with_keys_by_user_txn, user_id, - ) - - def _get_devices_with_keys_by_user_txn(self, txn, user_id): - now_stream_id = self._device_list_id_gen.get_current_token() - - devices = self._get_e2e_device_keys_txn( - txn, [(user_id, None)], include_all_devices=True - ) - - if devices: - user_devices = devices[user_id] - results = [] - for device_id, device in iteritems(user_devices): - result = { - "device_id": device_id, - } - - key_json = device.get("key_json", None) - if key_json: - result["keys"] = db_to_json(key_json) - device_display_name = device.get("device_display_name", None) - if device_display_name: - result["device_display_name"] = device_display_name - - results.append(result) - - return now_stream_id, results - - return now_stream_id, [] - - def mark_as_sent_devices_by_remote(self, destination, stream_id): - """Mark that updates have successfully been sent to the destination. - """ - return self.runInteraction( - "mark_as_sent_devices_by_remote", self._mark_as_sent_devices_by_remote_txn, - destination, stream_id, - ) - - def _mark_as_sent_devices_by_remote_txn(self, txn, destination, stream_id): - # We update the device_lists_outbound_last_success with the successfully - # poked users. We do the join to see which users need to be inserted and - # which updated. - sql = """ - SELECT user_id, coalesce(max(o.stream_id), 0), (max(s.stream_id) IS NOT NULL) - FROM device_lists_outbound_pokes as o - LEFT JOIN device_lists_outbound_last_success as s - USING (destination, user_id) - WHERE destination = ? AND o.stream_id <= ? - GROUP BY user_id - """ - txn.execute(sql, (destination, stream_id,)) - rows = txn.fetchall() - - sql = """ - UPDATE device_lists_outbound_last_success - SET stream_id = ? - WHERE destination = ? AND user_id = ? - """ - txn.executemany( - sql, ((row[1], destination, row[0],) for row in rows if row[2]) - ) - - sql = """ - INSERT INTO device_lists_outbound_last_success - (destination, user_id, stream_id) VALUES (?, ?, ?) - """ - txn.executemany( - sql, ((destination, row[0], row[1],) for row in rows if not row[2]) - ) - - # Delete all sent outbound pokes - sql = """ - DELETE FROM device_lists_outbound_pokes - WHERE destination = ? AND stream_id <= ? - """ - txn.execute(sql, (destination, stream_id,)) - - @defer.inlineCallbacks - def get_user_whose_devices_changed(self, from_key): - """Get set of users whose devices have changed since `from_key`. - """ - from_key = int(from_key) - changed = self._device_list_stream_cache.get_all_entities_changed(from_key) - if changed is not None: - defer.returnValue(set(changed)) - - sql = """ - SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ? - """ - rows = yield self._execute("get_user_whose_devices_changed", None, sql, from_key) - defer.returnValue(set(row[0] for row in rows)) - - def get_all_device_list_changes_for_remotes(self, from_key, to_key): - """Return a list of `(stream_id, user_id, destination)` which is the - combined list of changes to devices, and which destinations need to be - poked. `destination` may be None if no destinations need to be poked. - """ - # We do a group by here as there can be a large number of duplicate - # entries, since we throw away device IDs. - sql = """ - SELECT MAX(stream_id) AS stream_id, user_id, destination - FROM device_lists_stream - LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) - WHERE ? < stream_id AND stream_id <= ? - GROUP BY user_id, destination - """ - return self._execute( - "get_all_device_list_changes_for_remotes", None, - sql, from_key, to_key - ) - @defer.inlineCallbacks def add_device_change_to_streams(self, user_id, device_ids, hosts): """Persist that a user's devices have been updated, and which hosts @@ -732,9 +736,6 @@ class DeviceStore(BackgroundUpdateStore): ] ) - def get_device_stream_token(self): - return self._device_list_id_gen.get_current_token() - def _prune_old_outbound_device_pokes(self): """Delete old entries out of the device_lists_outbound_pokes to ensure that we don't fill up due to dead servers. We keep one entry per diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 2a0f6cfca9..e381e472a2 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -23,49 +23,7 @@ from synapse.util.caches.descriptors import cached from ._base import SQLBaseStore, db_to_json -class EndToEndKeyStore(SQLBaseStore): - def set_e2e_device_keys(self, user_id, device_id, time_now, device_keys): - """Stores device keys for a device. Returns whether there was a change - or the keys were already in the database. - """ - def _set_e2e_device_keys_txn(txn): - old_key_json = self._simple_select_one_onecol_txn( - txn, - table="e2e_device_keys_json", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - retcol="key_json", - allow_none=True, - ) - - # In py3 we need old_key_json to match new_key_json type. The DB - # returns unicode while encode_canonical_json returns bytes. - new_key_json = encode_canonical_json(device_keys).decode("utf-8") - - if old_key_json == new_key_json: - return False - - self._simple_upsert_txn( - txn, - table="e2e_device_keys_json", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - values={ - "ts_added_ms": time_now, - "key_json": new_key_json, - } - ) - - return True - - return self.runInteraction( - "set_e2e_device_keys", _set_e2e_device_keys_txn - ) - +class EndToEndKeyWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_e2e_device_keys( self, query_list, include_all_devices=False, @@ -238,6 +196,50 @@ class EndToEndKeyStore(SQLBaseStore): "count_e2e_one_time_keys", _count_e2e_one_time_keys ) + +class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): + def set_e2e_device_keys(self, user_id, device_id, time_now, device_keys): + """Stores device keys for a device. Returns whether there was a change + or the keys were already in the database. + """ + def _set_e2e_device_keys_txn(txn): + old_key_json = self._simple_select_one_onecol_txn( + txn, + table="e2e_device_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + retcol="key_json", + allow_none=True, + ) + + # In py3 we need old_key_json to match new_key_json type. The DB + # returns unicode while encode_canonical_json returns bytes. + new_key_json = encode_canonical_json(device_keys).decode("utf-8") + + if old_key_json == new_key_json: + return False + + self._simple_upsert_txn( + txn, + table="e2e_device_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + values={ + "ts_added_ms": time_now, + "key_json": new_key_json, + } + ) + + return True + + return self.runInteraction( + "set_e2e_device_keys", _set_e2e_device_keys_txn + ) + def claim_e2e_one_time_keys(self, query_list): """Take a list of one time keys out of the database""" def _claim_e2e_one_time_keys(txn): diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 38809ed0fc..a8d90456e3 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -442,6 +442,28 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, event_results.reverse() return event_results + @defer.inlineCallbacks + def get_successor_events(self, event_ids): + """Fetch all events that have the given events as a prev event + + Args: + event_ids (iterable[str]) + + Returns: + Deferred[list[str]] + """ + rows = yield self._simple_select_many_batch( + table="event_edges", + column="prev_event_id", + iterable=event_ids, + retcols=("event_id",), + desc="get_successor_events" + ) + + defer.returnValue([ + row["event_id"] for row in rows + ]) + class EventFederationStore(EventFederationWorkerStore): """ Responsible for storing and serving up the various graphs associated diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 06db9e56e6..428300ea0a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -537,6 +537,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore new_events = [ event for event, ctx in event_contexts if not event.internal_metadata.is_outlier() and not ctx.rejected + and not event.internal_metadata.is_soft_failed() ] # start with the existing forward extremities @@ -1406,21 +1407,6 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore values=state_values, ) - self._simple_insert_many_txn( - txn, - table="event_edges", - values=[ - { - "event_id": event.event_id, - "prev_event_id": prev_id, - "room_id": event.room_id, - "is_state": True, - } - for event, _ in state_events_and_contexts - for prev_id, _ in event.prev_state - ], - ) - # Prefill the event cache self._add_to_cache(txn, events_and_contexts) diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 6a5028961d..4b8438c3e9 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -186,6 +186,63 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, defer.returnValue(results) @defer.inlineCallbacks + def move_push_rule_from_room_to_room( + self, new_room_id, user_id, rule, + ): + """Move a single push rule from one room to another for a specific user. + + Args: + new_room_id (str): ID of the new room. + user_id (str): ID of user the push rule belongs to. + rule (Dict): A push rule. + """ + # Create new rule id + rule_id_scope = '/'.join(rule["rule_id"].split('/')[:-1]) + new_rule_id = rule_id_scope + "/" + new_room_id + + # Change room id in each condition + for condition in rule.get("conditions", []): + if condition.get("key") == "room_id": + condition["pattern"] = new_room_id + + # Add the rule for the new room + yield self.add_push_rule( + user_id=user_id, + rule_id=new_rule_id, + priority_class=rule["priority_class"], + conditions=rule["conditions"], + actions=rule["actions"], + ) + + # Delete push rule for the old room + yield self.delete_push_rule(user_id, rule["rule_id"]) + + @defer.inlineCallbacks + def move_push_rules_from_room_to_room_for_user( + self, old_room_id, new_room_id, user_id, + ): + """Move all of the push rules from one room to another for a specific + user. + + Args: + old_room_id (str): ID of the old room. + new_room_id (str): ID of the new room. + user_id (str): ID of user to copy push rules for. + """ + # Retrieve push rules for this user + user_push_rules = yield self.get_push_rules_for_user(user_id) + + # Get rules relating to the old room, move them to the new room, then + # delete them from the old room + for rule in user_push_rules: + conditions = rule.get("conditions", []) + if any((c.get("key") == "room_id" and + c.get("pattern") == old_room_id) for c in conditions): + self.move_push_rule_from_room_to_room( + new_room_id, user_id, rule, + ) + + @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): state_group = context.state_group if not state_group: diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 0ac665e967..89a1f7e3d7 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -301,7 +301,9 @@ class ReceiptsWorkerStore(SQLBaseStore): args.append(limit) txn.execute(sql, args) - return txn.fetchall() + return ( + r[0:5] + (json.loads(r[5]), ) for r in txn + ) return self.runInteraction( "get_all_updated_receipts", get_all_updated_receipts_txn ) @@ -346,15 +348,23 @@ class ReceiptsStore(ReceiptsWorkerStore): def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): + """Inserts a read-receipt into the database if it's newer than the current RR + + Returns: int|None + None if the RR is older than the current RR + otherwise, the rx timestamp of the event that the RR corresponds to + (or 0 if the event is unknown) + """ res = self._simple_select_one_txn( txn, table="events", - retcols=["topological_ordering", "stream_ordering"], + retcols=["stream_ordering", "received_ts"], keyvalues={"event_id": event_id}, allow_none=True ) stream_ordering = int(res["stream_ordering"]) if res else None + rx_ts = res["received_ts"] if res else 0 # We don't want to clobber receipts for more recent events, so we # have to compare orderings of existing receipts @@ -373,7 +383,7 @@ class ReceiptsStore(ReceiptsWorkerStore): "one for later event %s", event_id, eid, ) - return False + return None txn.call_after( self.get_receipts_for_room.invalidate, (room_id, receipt_type) @@ -429,7 +439,7 @@ class ReceiptsStore(ReceiptsWorkerStore): stream_ordering=stream_ordering, ) - return True + return rx_ts @defer.inlineCallbacks def insert_receipt(self, room_id, receipt_type, user_id, event_ids, data): @@ -466,7 +476,7 @@ class ReceiptsStore(ReceiptsWorkerStore): stream_id_manager = self._receipts_id_gen.get_next() with stream_id_manager as stream_id: - have_persisted = yield self.runInteraction( + event_ts = yield self.runInteraction( "insert_linearized_receipt", self.insert_linearized_receipt_txn, room_id, receipt_type, user_id, linearized_event_id, @@ -474,8 +484,14 @@ class ReceiptsStore(ReceiptsWorkerStore): stream_id=stream_id, ) - if not have_persisted: - defer.returnValue(None) + if event_ts is None: + defer.returnValue(None) + + now = self._clock.time_msec() + logger.debug( + "RR for event %s in %s (%i ms old)", + linearized_event_id, room_id, now - event_ts, + ) yield self.insert_graph_receipt( room_id, receipt_type, user_id, event_ids, data diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 9b9572890b..9b6c28892c 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -295,6 +295,39 @@ class RegistrationWorkerStore(SQLBaseStore): return ret['user_id'] return None + @defer.inlineCallbacks + def user_add_threepid(self, user_id, medium, address, validated_at, added_at): + yield self._simple_upsert("user_threepids", { + "medium": medium, + "address": address, + }, { + "user_id": user_id, + "validated_at": validated_at, + "added_at": added_at, + }) + + @defer.inlineCallbacks + def user_get_threepids(self, user_id): + ret = yield self._simple_select_list( + "user_threepids", { + "user_id": user_id + }, + ['medium', 'address', 'validated_at', 'added_at'], + 'user_get_threepids' + ) + defer.returnValue(ret) + + def user_delete_threepid(self, user_id, medium, address): + return self._simple_delete( + "user_threepids", + keyvalues={ + "user_id": user_id, + "medium": medium, + "address": address, + }, + desc="user_delete_threepids", + ) + class RegistrationStore(RegistrationWorkerStore, background_updates.BackgroundUpdateStore): @@ -633,39 +666,6 @@ class RegistrationStore(RegistrationWorkerStore, defer.returnValue(res if res else False) @defer.inlineCallbacks - def user_add_threepid(self, user_id, medium, address, validated_at, added_at): - yield self._simple_upsert("user_threepids", { - "medium": medium, - "address": address, - }, { - "user_id": user_id, - "validated_at": validated_at, - "added_at": added_at, - }) - - @defer.inlineCallbacks - def user_get_threepids(self, user_id): - ret = yield self._simple_select_list( - "user_threepids", { - "user_id": user_id - }, - ['medium', 'address', 'validated_at', 'added_at'], - 'user_get_threepids' - ) - defer.returnValue(ret) - - def user_delete_threepid(self, user_id, medium, address): - return self._simple_delete( - "user_threepids", - keyvalues={ - "user_id": user_id, - "medium": medium, - "address": address, - }, - desc="user_delete_threepids", - ) - - @defer.inlineCallbacks def save_or_get_3pid_guest_access_token( self, medium, address, access_token, inviter_user_id ): diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 41c65e112a..a979d4860a 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -500,10 +500,22 @@ class RoomStore(RoomWorkerStore, SearchStore): @defer.inlineCallbacks def block_room(self, room_id, user_id): - yield self._simple_insert( + """Marks the room as blocked. Can be called multiple times. + + Args: + room_id (str): Room to block + user_id (str): Who blocked it + + Returns: + Deferred + """ + yield self._simple_upsert( table="blocked_rooms", - values={ + keyvalues={ "room_id": room_id, + }, + values={}, + insertion_values={ "user_id": user_id, }, desc="block_room", diff --git a/synapse/storage/schema/delta/53/user_dir_populate.sql b/synapse/storage/schema/delta/53/user_dir_populate.sql new file mode 100644 index 0000000000..ffcc896b58 --- /dev/null +++ b/synapse/storage/schema/delta/53/user_dir_populate.sql @@ -0,0 +1,30 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Set up staging tables +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_user_directory_createtables', '{}'); + +-- Run through each room and update the user directory according to who is in it +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'); + +-- Insert all users, if search_all_users is on +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users'); diff --git a/synapse/storage/schema/delta/53/user_share.sql b/synapse/storage/schema/delta/53/user_share.sql new file mode 100644 index 0000000000..5831b1a6f8 --- /dev/null +++ b/synapse/storage/schema/delta/53/user_share.sql @@ -0,0 +1,44 @@ +/* Copyright 2017 Vector Creations Ltd, 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Old disused version of the tables below. +DROP TABLE IF EXISTS users_who_share_rooms; + +-- Tables keeping track of what users share rooms. This is a map of local users +-- to local or remote users, per room. Remote users cannot be in the user_id +-- column, only the other_user_id column. There are two tables, one for public +-- rooms and those for private rooms. +CREATE TABLE IF NOT EXISTS users_who_share_public_rooms ( + user_id TEXT NOT NULL, + other_user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS users_who_share_private_rooms ( + user_id TEXT NOT NULL, + other_user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_who_share_public_rooms_u_idx ON users_who_share_public_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_public_rooms_r_idx ON users_who_share_public_rooms(room_id); +CREATE INDEX users_who_share_public_rooms_o_idx ON users_who_share_public_rooms(other_user_id); + +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id); +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id); + +-- Make sure that we populate the tables initially by resetting the stream ID +UPDATE user_directory_stream_pos SET stream_id = NULL; diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql new file mode 100644 index 0000000000..f7827ca6d2 --- /dev/null +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -0,0 +1,28 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We don't need the old version of this table. +DROP TABLE IF EXISTS users_in_public_rooms; + +-- Old version of users_in_public_rooms +DROP TABLE IF EXISTS users_who_share_public_rooms; + +-- Track what users are in public rooms. +CREATE TABLE IF NOT EXISTS users_in_public_rooms ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); diff --git a/synapse/storage/schema/full_schemas/11/event_edges.sql b/synapse/storage/schema/full_schemas/11/event_edges.sql index 52eec88357..bccd1c6f74 100644 --- a/synapse/storage/schema/full_schemas/11/event_edges.sql +++ b/synapse/storage/schema/full_schemas/11/event_edges.sql @@ -37,6 +37,8 @@ CREATE TABLE IF NOT EXISTS event_edges( event_id TEXT NOT NULL, prev_event_id TEXT NOT NULL, room_id TEXT NOT NULL, + -- We no longer insert prev_state into this table, so all new rows will have + -- is_state as false. is_state BOOL NOT NULL, UNIQUE (event_id, prev_event_id, room_id, is_state) ); diff --git a/synapse/storage/state_deltas.py b/synapse/storage/state_deltas.py new file mode 100644 index 0000000000..57bc45cdb9 --- /dev/null +++ b/synapse/storage/state_deltas.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 Vector Creations Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from synapse.storage._base import SQLBaseStore + +logger = logging.getLogger(__name__) + + +class StateDeltasStore(SQLBaseStore): + + def get_current_state_deltas(self, prev_stream_id): + prev_stream_id = int(prev_stream_id) + if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id): + return [] + + def get_current_state_deltas_txn(txn): + # First we calculate the max stream id that will give us less than + # N results. + # We arbitarily limit to 100 stream_id entries to ensure we don't + # select toooo many. + sql = """ + SELECT stream_id, count(*) + FROM current_state_delta_stream + WHERE stream_id > ? + GROUP BY stream_id + ORDER BY stream_id ASC + LIMIT 100 + """ + txn.execute(sql, (prev_stream_id,)) + + total = 0 + max_stream_id = prev_stream_id + for max_stream_id, count in txn: + total += count + if total > 100: + # We arbitarily limit to 100 entries to ensure we don't + # select toooo many. + break + + # Now actually get the deltas + sql = """ + SELECT stream_id, room_id, type, state_key, event_id, prev_event_id + FROM current_state_delta_stream + WHERE ? < stream_id AND stream_id <= ? + ORDER BY stream_id ASC + """ + txn.execute(sql, (prev_stream_id, max_stream_id,)) + return self.cursor_to_dict(txn) + + return self.runInteraction( + "get_current_state_deltas", get_current_state_deltas_txn + ) + + def get_max_stream_id_in_current_state_deltas(self): + return self._simple_select_one_onecol( + table="current_state_delta_stream", + keyvalues={}, + retcol="COALESCE(MAX(stream_id), -1)", + desc="get_max_stream_id_in_current_state_deltas", + ) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index d6cfdba519..580fafeb3a 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -191,6 +191,25 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, order='DESC'): + """Get new room events in stream ordering since `from_key`. + + Args: + room_id (str) + from_key (str): Token from which no events are returned before + to_key (str): Token from which no events are returned after. (This + is typically the current stream token) + limit (int): Maximum number of events to return + order (str): Either "DESC" or "ASC". Determines which events are + returned when the result is limited. If "DESC" then the most + recent `limit` events are returned, otherwise returns the + oldest `limit` events. + + Returns: + Deferred[dict[str,tuple[list[FrozenEvent], str]]] + A map from room id to a tuple containing: + - list of recent events in the room + - stream ordering key for the start of the chunk of events returned. + """ from_id = RoomStreamToken.parse_stream_token(from_key).stream room_ids = yield self._events_stream_cache.get_entities_changed( diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index fea866c043..4d60a5726f 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -16,22 +16,314 @@ import logging import re -from six import iteritems - from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter +from synapse.storage.state_deltas import StateDeltasStore from synapse.types import get_domain_from_id, get_localpart_from_id -from synapse.util.caches.descriptors import cached, cachedInlineCallbacks - -from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) -class UserDirectoryStore(SQLBaseStore): +TEMP_TABLE = "_temp_populate_user_directory" + + +class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): + + # How many records do we calculate before sending it to + # add_users_who_share_private_rooms? + SHARE_PRIVATE_WORKING_SET = 500 + + def __init__(self, db_conn, hs): + super(UserDirectoryStore, self).__init__(db_conn, hs) + + self.server_name = hs.hostname + + self.register_background_update_handler( + "populate_user_directory_createtables", + self._populate_user_directory_createtables, + ) + self.register_background_update_handler( + "populate_user_directory_process_rooms", + self._populate_user_directory_process_rooms, + ) + self.register_background_update_handler( + "populate_user_directory_process_users", + self._populate_user_directory_process_users, + ) + self.register_background_update_handler( + "populate_user_directory_cleanup", self._populate_user_directory_cleanup + ) + + @defer.inlineCallbacks + def _populate_user_directory_createtables(self, progress, batch_size): + + # Get all the rooms that we want to process. + def _make_staging_area(txn): + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)" + ) + txn.execute(sql) + + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_position(position TEXT NOT NULL)" + ) + txn.execute(sql) + + # Get rooms we want to process from the database + sql = """ + SELECT room_id, count(*) FROM current_state_events + GROUP BY room_id + """ + txn.execute(sql) + rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()] + self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) + del rooms + + # If search all users is on, get all the users we want to add. + if self.hs.config.user_directory_search_all_users: + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_users(user_id TEXT NOT NULL)" + ) + txn.execute(sql) + + txn.execute("SELECT name FROM users") + users = [{"user_id": x[0]} for x in txn.fetchall()] + + self._simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) + + new_pos = yield self.get_max_stream_id_in_current_state_deltas() + yield self.runInteraction( + "populate_user_directory_temp_build", _make_staging_area + ) + yield self._simple_insert(TEMP_TABLE + "_position", {"position": new_pos}) + + yield self._end_background_update("populate_user_directory_createtables") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_user_directory_cleanup(self, progress, batch_size): + """ + Update the user directory stream position, then clean up the old tables. + """ + position = yield self._simple_select_one_onecol( + TEMP_TABLE + "_position", None, "position" + ) + yield self.update_user_directory_stream_pos(position) + + def _delete_staging_area(txn): + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms") + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_users") + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position") + + yield self.runInteraction( + "populate_user_directory_cleanup", _delete_staging_area + ) + + yield self._end_background_update("populate_user_directory_cleanup") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_user_directory_process_rooms(self, progress, batch_size): + """ + Args: + progress (dict) + batch_size (int): Maximum number of state events to process + per cycle. + """ + state = self.hs.get_state_handler() + + # If we don't have progress filed, delete everything. + if not progress: + yield self.delete_all_from_user_dir() + + def _get_next_batch(txn): + # Only fetch 250 rooms, so we don't fetch too many at once, even + # if those 250 rooms have less than batch_size state events. + sql = """ + SELECT room_id, events FROM %s + ORDER BY events DESC + LIMIT 250 + """ % ( + TEMP_TABLE + "_rooms", + ) + txn.execute(sql) + rooms_to_work_on = txn.fetchall() + + if not rooms_to_work_on: + return None + + # Get how many are left to process, so we can give status on how + # far we are in processing + txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms") + progress["remaining"] = txn.fetchone()[0] + + return rooms_to_work_on + + rooms_to_work_on = yield self.runInteraction( + "populate_user_directory_temp_read", _get_next_batch + ) + + # No more rooms -- complete the transaction. + if not rooms_to_work_on: + yield self._end_background_update("populate_user_directory_process_rooms") + defer.returnValue(1) + + logger.info( + "Processing the next %d rooms of %d remaining" + % (len(rooms_to_work_on), progress["remaining"]) + ) + + processed_event_count = 0 + + for room_id, event_count in rooms_to_work_on: + is_in_room = yield self.is_host_joined(room_id, self.server_name) + + if is_in_room: + is_public = yield self.is_room_world_readable_or_publicly_joinable( + room_id + ) + + users_with_profile = yield state.get_current_user_in_room(room_id) + user_ids = set(users_with_profile) + + # Update each user in the user directory. + for user_id, profile in users_with_profile.items(): + yield self.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) + + to_insert = set() + + if is_public: + for user_id in user_ids: + if self.get_if_app_services_interested_in_user(user_id): + continue + + to_insert.add(user_id) + + if to_insert: + yield self.add_users_in_public_rooms(room_id, to_insert) + to_insert.clear() + else: + for user_id in user_ids: + if not self.hs.is_mine_id(user_id): + continue + + if self.get_if_app_services_interested_in_user(user_id): + continue + + for other_user_id in user_ids: + if user_id == other_user_id: + continue + + user_set = (user_id, other_user_id) + to_insert.add(user_set) + + # If it gets too big, stop and write to the database + # to prevent storing too much in RAM. + if len(to_insert) >= self.SHARE_PRIVATE_WORKING_SET: + yield self.add_users_who_share_private_room( + room_id, to_insert + ) + to_insert.clear() + + if to_insert: + yield self.add_users_who_share_private_room(room_id, to_insert) + to_insert.clear() + + # We've finished a room. Delete it from the table. + yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id}) + # Update the remaining counter. + progress["remaining"] -= 1 + yield self.runInteraction( + "populate_user_directory", + self._background_update_progress_txn, + "populate_user_directory_process_rooms", + progress, + ) + + processed_event_count += event_count + + if processed_event_count > batch_size: + # Don't process any more rooms, we've hit our batch size. + defer.returnValue(processed_event_count) + + defer.returnValue(processed_event_count) + + @defer.inlineCallbacks + def _populate_user_directory_process_users(self, progress, batch_size): + """ + If search_all_users is enabled, add all of the users to the user directory. + """ + if not self.hs.config.user_directory_search_all_users: + yield self._end_background_update("populate_user_directory_process_users") + defer.returnValue(1) + + def _get_next_batch(txn): + sql = "SELECT user_id FROM %s LIMIT %s" % ( + TEMP_TABLE + "_users", + str(batch_size), + ) + txn.execute(sql) + users_to_work_on = txn.fetchall() + + if not users_to_work_on: + return None + + users_to_work_on = [x[0] for x in users_to_work_on] + + # Get how many are left to process, so we can give status on how + # far we are in processing + sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users" + txn.execute(sql) + progress["remaining"] = txn.fetchone()[0] + + return users_to_work_on + + users_to_work_on = yield self.runInteraction( + "populate_user_directory_temp_read", _get_next_batch + ) + + # No more users -- complete the transaction. + if not users_to_work_on: + yield self._end_background_update("populate_user_directory_process_users") + defer.returnValue(1) + + logger.info( + "Processing the next %d users of %d remaining" + % (len(users_to_work_on), progress["remaining"]) + ) + + for user_id in users_to_work_on: + profile = yield self.get_profileinfo(get_localpart_from_id(user_id)) + yield self.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) + + # We've finished processing a user. Delete it from the table. + yield self._simple_delete_one(TEMP_TABLE + "_users", {"user_id": user_id}) + # Update the remaining counter. + progress["remaining"] -= 1 + yield self.runInteraction( + "populate_user_directory", + self._background_update_progress_txn, + "populate_user_directory_process_users", + progress, + ) + + defer.returnValue(len(users_to_work_on)) + @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -63,106 +355,16 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(False) - @defer.inlineCallbacks - def add_users_to_public_room(self, room_id, user_ids): - """Add user to the list of users in public rooms - - Args: - room_id (str): A room_id that all users are in that is world_readable - or publically joinable - user_ids (list(str)): Users to add + def update_profile_in_user_dir(self, user_id, display_name, avatar_url): """ - yield self._simple_insert_many( - table="users_in_public_rooms", - values=[{"user_id": user_id, "room_id": room_id} for user_id in user_ids], - desc="add_users_to_public_room", - ) - for user_id in user_ids: - self.get_user_in_public_room.invalidate((user_id,)) - - def add_profiles_to_user_dir(self, room_id, users_with_profile): - """Add profiles to the user directory - - Args: - room_id (str): A room_id that all users are joined to - users_with_profile (dict): Users to add to directory in the form of - mapping of user_id -> ProfileInfo + Update or add a user's profile in the user directory. """ - if isinstance(self.database_engine, PostgresEngine): - # We weight the loclpart most highly, then display name and finally - # server name - sql = """ - INSERT INTO user_directory_search(user_id, vector) - VALUES (?, - setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') - ) - """ - args = ( - ( - user_id, - get_localpart_from_id(user_id), - get_domain_from_id(user_id), - profile.display_name, - ) - for user_id, profile in iteritems(users_with_profile) - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = """ - INSERT INTO user_directory_search(user_id, value) - VALUES (?,?) - """ - args = ( - ( - user_id, - "%s %s" % (user_id, p.display_name) if p.display_name else user_id, - ) - for user_id, p in iteritems(users_with_profile) - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - def _add_profiles_to_user_dir_txn(txn): - txn.executemany(sql, args) - self._simple_insert_many_txn( - txn, - table="user_directory", - values=[ - { - "user_id": user_id, - "room_id": room_id, - "display_name": profile.display_name, - "avatar_url": profile.avatar_url, - } - for user_id, profile in iteritems(users_with_profile) - ], - ) - for user_id in users_with_profile: - txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - return self.runInteraction( - "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn - ) - - @defer.inlineCallbacks - def update_user_in_user_dir(self, user_id, room_id): - yield self._simple_update_one( - table="user_directory", - keyvalues={"user_id": user_id}, - updatevalues={"room_id": room_id}, - desc="update_user_in_user_dir", - ) - self.get_user_in_directory.invalidate((user_id,)) - - def update_profile_in_user_dir(self, user_id, display_name, avatar_url, room_id): def _update_profile_in_user_dir_txn(txn): new_entry = self._simple_upsert_txn( txn, table="user_directory", keyvalues={"user_id": user_id}, - insertion_values={"room_id": room_id}, values={"display_name": display_name, "avatar_url": avatar_url}, lock=False, # We're only inserter ) @@ -250,16 +452,6 @@ class UserDirectoryStore(SQLBaseStore): "update_profile_in_user_dir", _update_profile_in_user_dir_txn ) - @defer.inlineCallbacks - def update_user_in_public_user_list(self, user_id, room_id): - yield self._simple_update_one( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - updatevalues={"room_id": room_id}, - desc="update_user_in_public_user_list", - ) - self.get_user_in_public_room.invalidate((user_id,)) - def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): self._simple_delete_txn( @@ -271,234 +463,154 @@ class UserDirectoryStore(SQLBaseStore): self._simple_delete_txn( txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id}, + ) txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - txn.call_after(self.get_user_in_public_room.invalidate, (user_id,)) return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) @defer.inlineCallbacks - def remove_from_user_in_public_room(self, user_id): - yield self._simple_delete( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - desc="remove_from_user_in_public_room", - ) - self.get_user_in_public_room.invalidate((user_id,)) - - def get_users_in_public_due_to_room(self, room_id): - """Get all user_ids that are in the room directory because they're - in the given room_id - """ - return self._simple_select_onecol( - table="users_in_public_rooms", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_public_due_to_room", - ) - - @defer.inlineCallbacks def get_users_in_dir_due_to_room(self, room_id): """Get all user_ids that are in the room directory because they're in the given room_id """ - user_ids_dir = yield self._simple_select_onecol( - table="user_directory", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids_pub = yield self._simple_select_onecol( + user_ids_share_pub = yield self._simple_select_onecol( table="users_in_public_rooms", keyvalues={"room_id": room_id}, retcol="user_id", desc="get_users_in_dir_due_to_room", ) - user_ids_share = yield self._simple_select_onecol( - table="users_who_share_rooms", + user_ids_share_priv = yield self._simple_select_onecol( + table="users_who_share_private_rooms", keyvalues={"room_id": room_id}, - retcol="user_id", + retcol="other_user_id", desc="get_users_in_dir_due_to_room", ) - user_ids = set(user_ids_dir) - user_ids.update(user_ids_pub) - user_ids.update(user_ids_share) + user_ids = set(user_ids_share_pub) + user_ids.update(user_ids_share_priv) defer.returnValue(user_ids) - @defer.inlineCallbacks - def get_all_rooms(self): - """Get all room_ids we've ever known about, in ascending order of "size" - """ - sql = """ - SELECT room_id FROM current_state_events - GROUP BY room_id - ORDER BY count(*) ASC - """ - rows = yield self._execute("get_all_rooms", None, sql) - defer.returnValue([room_id for room_id, in rows]) - - @defer.inlineCallbacks - def get_all_local_users(self): - """Get all local users - """ - sql = """ - SELECT name FROM users - """ - rows = yield self._execute("get_all_local_users", None, sql) - defer.returnValue([name for name, in rows]) - - def add_users_who_share_room(self, room_id, share_private, user_id_tuples): - """Insert entries into the users_who_share_rooms table. The first + def add_users_who_share_private_room(self, room_id, user_id_tuples): + """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. Args: room_id (str) - share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _add_users_who_share_room_txn(txn): - self._simple_insert_many_txn( + self._simple_upsert_many_txn( txn, - table="users_who_share_rooms", - values=[ - { - "user_id": user_id, - "other_user_id": other_user_id, - "room_id": room_id, - "share_private": share_private, - } + table="users_who_share_private_rooms", + key_names=["user_id", "other_user_id", "room_id"], + key_values=[ + (user_id, other_user_id, room_id) for user_id, other_user_id in user_id_tuples ], + value_names=(), + value_values=None, ) - for user_id, other_user_id in user_id_tuples: - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) - ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) - ) return self.runInteraction( "add_users_who_share_room", _add_users_who_share_room_txn ) - def update_users_who_share_room(self, room_id, share_private, user_id_sets): - """Updates entries in the users_who_share_rooms table. The first + def add_users_in_public_rooms(self, room_id, user_ids): + """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. Args: room_id (str) - share_private (bool): Is the room private - user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. + user_ids (list[str]) """ - def _update_users_who_share_room_txn(txn): - sql = """ - UPDATE users_who_share_rooms - SET room_id = ?, share_private = ? - WHERE user_id = ? AND other_user_id = ? - """ - txn.executemany( - sql, ((room_id, share_private, uid, oid) for uid, oid in user_id_sets) + def _add_users_in_public_rooms_txn(txn): + + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id", "room_id"], + key_values=[(user_id, room_id) for user_id in user_ids], + value_names=(), + value_values=None, ) - for user_id, other_user_id in user_id_sets: - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) - ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) - ) return self.runInteraction( - "update_users_who_share_room", _update_users_who_share_room_txn + "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) - def remove_user_who_share_room(self, user_id, other_user_id): - """Deletes entries in the users_who_share_rooms table. The first + def remove_user_who_share_room(self, user_id, room_id): + """ + Deletes entries in the users_who_share_*_rooms table. The first user should be a local user. Args: + user_id (str) room_id (str) - share_private (bool): Is the room private - user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _remove_user_who_share_room_txn(txn): self._simple_delete_txn( txn, - table="users_who_share_rooms", - keyvalues={"user_id": user_id, "other_user_id": other_user_id}, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id, "room_id": room_id}, ) - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id, "room_id": room_id}, ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) + self._simple_delete_txn( + txn, + table="users_in_public_rooms", + keyvalues={"user_id": user_id, "room_id": room_id}, ) return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) - @cached(max_entries=500000) - def get_if_users_share_a_room(self, user_id, other_user_id): - """Gets if users share a room. - - Args: - user_id (str): Must be a local user_id - other_user_id (str) - - Returns: - bool|None: None if they don't share a room, otherwise whether they - share a private room or not. + @defer.inlineCallbacks + def get_user_dir_rooms_user_is_in(self, user_id): """ - return self._simple_select_one_onecol( - table="users_who_share_rooms", - keyvalues={"user_id": user_id, "other_user_id": other_user_id}, - retcol="share_private", - allow_none=True, - desc="get_if_users_share_a_room", - ) - - @cachedInlineCallbacks(max_entries=500000, iterable=True) - def get_users_who_share_room_from_dir(self, user_id): - """Returns the set of users who share a room with `user_id` + Returns the rooms that a user is in. Args: user_id(str): Must be a local user Returns: - dict: user_id -> share_private mapping + list: user_id """ - rows = yield self._simple_select_list( - table="users_who_share_rooms", + rows = yield self._simple_select_onecol( + table="users_who_share_private_rooms", keyvalues={"user_id": user_id}, - retcols=("other_user_id", "share_private"), - desc="get_users_who_share_room_with_user", + retcol="room_id", + desc="get_rooms_user_is_in", ) - defer.returnValue({row["other_user_id"]: row["share_private"] for row in rows}) - - def get_users_in_share_dir_with_room_id(self, user_id, room_id): - """Get all user tuples that are in the users_who_share_rooms due to the - given room_id. - - Returns: - [(user_id, other_user_id)]: where one of the two will match the given - user_id. - """ - sql = """ - SELECT user_id, other_user_id FROM users_who_share_rooms - WHERE room_id = ? AND (user_id = ? OR other_user_id = ?) - """ - return self._execute( - "get_users_in_share_dir_with_room_id", None, sql, room_id, user_id, user_id + pub_rows = yield self._simple_select_onecol( + table="users_in_public_rooms", + keyvalues={"user_id": user_id}, + retcol="room_id", + desc="get_rooms_user_is_in", ) + users = set(pub_rows) + users.update(rows) + defer.returnValue(list(users)) + @defer.inlineCallbacks def get_rooms_in_common_for_users(self, user_id, other_user_id): """Given two user_ids find out the list of rooms they share. @@ -533,11 +645,8 @@ class UserDirectoryStore(SQLBaseStore): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_rooms") + txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) - txn.call_after(self.get_user_in_public_room.invalidate_all) - txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) - txn.call_after(self.get_if_users_share_a_room.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn @@ -548,21 +657,11 @@ class UserDirectoryStore(SQLBaseStore): return self._simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, - retcols=("room_id", "display_name", "avatar_url"), + retcols=("display_name", "avatar_url"), allow_none=True, desc="get_user_in_directory", ) - @cached() - def get_user_in_public_room(self, user_id): - return self._simple_select_one( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - retcols=("room_id",), - allow_none=True, - desc="get_user_in_public_room", - ) - def get_user_directory_stream_pos(self): return self._simple_select_one_onecol( table="user_directory_stream_pos", @@ -579,59 +678,6 @@ class UserDirectoryStore(SQLBaseStore): desc="update_user_directory_stream_pos", ) - def get_current_state_deltas(self, prev_stream_id): - prev_stream_id = int(prev_stream_id) - if not self._curr_state_delta_stream_cache.has_any_entity_changed( - prev_stream_id - ): - return [] - - def get_current_state_deltas_txn(txn): - # First we calculate the max stream id that will give us less than - # N results. - # We arbitarily limit to 100 stream_id entries to ensure we don't - # select toooo many. - sql = """ - SELECT stream_id, count(*) - FROM current_state_delta_stream - WHERE stream_id > ? - GROUP BY stream_id - ORDER BY stream_id ASC - LIMIT 100 - """ - txn.execute(sql, (prev_stream_id,)) - - total = 0 - max_stream_id = prev_stream_id - for max_stream_id, count in txn: - total += count - if total > 100: - # We arbitarily limit to 100 entries to ensure we don't - # select toooo many. - break - - # Now actually get the deltas - sql = """ - SELECT stream_id, room_id, type, state_key, event_id, prev_event_id - FROM current_state_delta_stream - WHERE ? < stream_id AND stream_id <= ? - ORDER BY stream_id ASC - """ - txn.execute(sql, (prev_stream_id, max_stream_id)) - return self.cursor_to_dict(txn) - - return self.runInteraction( - "get_current_state_deltas", get_current_state_deltas_txn - ) - - def get_max_stream_id_in_current_state_deltas(self): - return self._simple_select_one_onecol( - table="current_state_delta_stream", - keyvalues={}, - retcol="COALESCE(MAX(stream_id), -1)", - desc="get_max_stream_id_in_current_state_deltas", - ) - @defer.inlineCallbacks def search_user_dir(self, user_id, search_term, limit): """Searches for users in directory @@ -652,22 +698,19 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - # make s.user_id null to keep the ordering algorithm happy - join_clause = """ - CROSS JOIN (SELECT NULL as user_id) AS s - """ - join_args = () - where_clause = "1=1" + join_args = (user_id,) + where_clause = "user_id != ?" else: - join_clause = """ - LEFT JOIN users_in_public_rooms AS p USING (user_id) - LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_rooms - WHERE user_id = ? AND share_private - ) AS s USING (user_id) - """ join_args = (user_id,) - where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" + where_clause = """ + ( + EXISTS (select 1 from users_in_public_rooms WHERE user_id = t.user_id) + OR EXISTS ( + SELECT 1 FROM users_who_share_private_rooms + WHERE user_id = ? AND other_user_id = t.user_id + ) + ) + """ if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -679,14 +722,13 @@ class UserDirectoryStore(SQLBaseStore): # search: (domain, _, display name, localpart) sql = """ SELECT d.user_id AS user_id, display_name, avatar_url - FROM user_directory_search + FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) - %s WHERE %s AND vector @@ to_tsquery('english', ?) ORDER BY - (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) + (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END) * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END) * ( @@ -708,7 +750,6 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % ( - join_clause, where_clause, ) args = join_args + (full_query, exact_query, prefix_query, limit + 1) @@ -717,9 +758,8 @@ class UserDirectoryStore(SQLBaseStore): sql = """ SELECT d.user_id AS user_id, display_name, avatar_url - FROM user_directory_search + FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) - %s WHERE %s AND value MATCH ? @@ -729,7 +769,6 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % ( - join_clause, where_clause, ) args = join_args + (search_query, limit + 1) diff --git a/synapse/types.py b/synapse/types.py index d8cb64addb..3de94b6335 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -16,6 +16,8 @@ import re import string from collections import namedtuple +import attr + from synapse.api.errors import SynapseError @@ -455,3 +457,13 @@ class ThirdPartyInstanceID( @classmethod def create(cls, appservice_id, network_id,): return cls(appservice_id=appservice_id, network_id=network_id) + + +@attr.s(slots=True) +class ReadReceipt(object): + """Information about a read-receipt""" + room_id = attr.ib() + receipt_type = attr.ib() + user_id = attr.ib() + event_ids = attr.ib() + data = attr.ib() diff --git a/synapse/visibility.py b/synapse/visibility.py index 0281a7c919..16c40cd74c 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -67,6 +67,10 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, Returns: Deferred[list[synapse.events.EventBase]] """ + # Filter out events that have been soft failed so that we don't relay them + # to clients. + events = list(e for e in events if not e.internal_metadata.is_soft_failed()) + types = ( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), @@ -216,28 +220,36 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events): - # Whatever else we do, we need to check for senders which have requested - # erasure of their data. - erased_senders = yield store.are_users_erased( - (e.sender for e in events), - ) +def filter_events_for_server(store, server_name, events, redact=True, + check_history_visibility_only=False): + """Filter a list of events based on whether given server is allowed to + see them. - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. - if erased_senders[event.sender]: + Args: + store (DataStore) + server_name (str) + events (iterable[FrozenEvent]) + redact (bool): Whether to return a redacted version of the event, or + to filter them out entirely. + check_history_visibility_only (bool): Whether to only check the + history visibility, rather than things like if the sender has been + erased. This is used e.g. during pagination to decide whether to + backfill or not. + + Returns + Deferred[list[FrozenEvent]] + """ + + def is_sender_erased(event, erased_senders): + if erased_senders and erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, ) - return prune_event(event) - - # state will be None if we decided we didn't need to filter by - # room membership. - if not state: - return event + return True + return False + def check_event_is_visible(event, state): history = state.get((EventTypes.RoomHistoryVisibility, ''), None) if history: visibility = history.content.get("history_visibility", "shared") @@ -259,17 +271,17 @@ def filter_events_for_server(store, server_name, events): memtype = ev.membership if memtype == Membership.JOIN: - return event + return True elif memtype == Membership.INVITE: if visibility == "invited": - return event + return True else: # server has no users in the room: redact - return prune_event(event) + return False - return event + return True - # Next lets check to see if all the events have a history visibility + # Lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events( @@ -296,16 +308,31 @@ def filter_events_for_server(store, server_name, events): for e in itervalues(event_map) ) + if not check_history_visibility_only: + erased_senders = yield store.are_users_erased( + (e.sender for e in events), + ) + else: + # We don't want to check whether users are erased, which is equivalent + # to no users having been erased. + erased_senders = {} + if all_open: # all the history_visibility state affecting these events is open, so # we don't need to filter by membership state. We *do* need to check # for user erasure, though. if erased_senders: - events = [ - redact_disallowed(e, None) - for e in events - ] + to_return = [] + for e in events: + if not is_sender_erased(e, erased_senders): + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) + # If there are no erased users then we can just return the given list + # of events without having to copy it. defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -361,7 +388,13 @@ def filter_events_for_server(store, server_name, events): for e_id, key_to_eid in iteritems(event_to_state_ids) } - defer.returnValue([ - redact_disallowed(e, event_to_state[e.event_id]) - for e in events - ]) + to_return = [] + for e in events: + erased = is_sender_erased(e, erased_senders) + visible = check_event_is_visible(e, event_to_state[e.event_id]) + if visible and not erased: + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) |