From d5275fc55f4edc42d1543825da2c13df63d96927 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Jan 2020 13:47:50 +0000 Subject: Propagate cache invalidates from workers to other workers. (#6748) Currently if a worker invalidates a cache it will be streamed to master, which then didn't forward those to other workers. --- synapse/replication/tcp/protocol.py | 2 +- synapse/replication/tcp/resource.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'synapse/replication') diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 131e5acb09..bc1482a9bb 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -459,7 +459,7 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): await self.streamer.on_remove_pusher(cmd.app_id, cmd.push_key, cmd.user_id) async def on_INVALIDATE_CACHE(self, cmd): - self.streamer.on_invalidate_cache(cmd.cache_func, cmd.keys) + await self.streamer.on_invalidate_cache(cmd.cache_func, cmd.keys) async def on_REMOTE_SERVER_UP(self, cmd: RemoteServerUpCommand): self.streamer.on_remote_server_up(cmd.data) diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 6ebf944f66..ce60ae2e07 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -17,7 +17,7 @@ import logging import random -from typing import List +from typing import Any, List from six import itervalues @@ -271,11 +271,14 @@ class ReplicationStreamer(object): self.notifier.on_new_replication_data() @measure_func("repl.on_invalidate_cache") - def on_invalidate_cache(self, cache_func, keys): + async def on_invalidate_cache(self, cache_func: str, keys: List[Any]): """The client has asked us to invalidate a cache """ invalidate_cache_counter.inc() - getattr(self.store, cache_func).invalidate(tuple(keys)) + + # We invalidate the cache locally, but then also stream that to other + # workers. + await self.store.invalidate_cache_and_stream(cache_func, tuple(keys)) @measure_func("repl.on_user_ip") async def on_user_ip( -- cgit 1.5.1 From e17a11066192354f6c6144135a14e7abe524f44c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Jan 2020 14:43:21 +0000 Subject: Detect unknown remote devices and mark cache as stale (#6776) We just mark the fact that the cache may be stale in the database for now. --- changelog.d/6776.misc | 1 + synapse/handlers/devicemessage.py | 57 +++++++++++++++++++++- synapse/handlers/federation.py | 20 ++++++++ synapse/replication/slave/storage/devices.py | 2 +- synapse/storage/data_stores/main/devices.py | 29 +++++++++-- .../delta/57/device_list_remote_cache_stale.sql | 25 ++++++++++ 6 files changed, 126 insertions(+), 8 deletions(-) create mode 100644 changelog.d/6776.misc create mode 100644 synapse/storage/data_stores/main/schema/delta/57/device_list_remote_cache_stale.sql (limited to 'synapse/replication') diff --git a/changelog.d/6776.misc b/changelog.d/6776.misc new file mode 100644 index 0000000000..4f9a4ac7a5 --- /dev/null +++ b/changelog.d/6776.misc @@ -0,0 +1 @@ +Detect unknown remote devices and mark cache as stale. diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index 73b9e120f5..5c5fe77be2 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -14,6 +14,7 @@ # limitations under the License. import logging +from typing import Any, Dict from canonicaljson import json @@ -65,6 +66,9 @@ class DeviceMessageHandler(object): logger.warning("Request for keys for non-local user %s", user_id) raise SynapseError(400, "Not a user here") + if not by_device: + continue + messages_by_device = { device_id: { "content": message_content, @@ -73,8 +77,11 @@ class DeviceMessageHandler(object): } for device_id, message_content in by_device.items() } - if messages_by_device: - local_messages[user_id] = messages_by_device + local_messages[user_id] = messages_by_device + + yield self._check_for_unknown_devices( + message_type, sender_user_id, by_device + ) stream_id = yield self.store.add_messages_from_remote_to_device_inbox( origin, message_id, local_messages @@ -84,6 +91,52 @@ class DeviceMessageHandler(object): "to_device_key", stream_id, users=local_messages.keys() ) + @defer.inlineCallbacks + def _check_for_unknown_devices( + self, + message_type: str, + sender_user_id: str, + by_device: Dict[str, Dict[str, Any]], + ): + """Checks inbound device messages for unkown remote devices, and if + found marks the remote cache for the user as stale. + """ + + if message_type != "m.room_key_request": + return + + # Get the sending device IDs + requesting_device_ids = set() + for message_content in by_device.values(): + device_id = message_content.get("requesting_device_id") + requesting_device_ids.add(device_id) + + # Check if we are tracking the devices of the remote user. + room_ids = yield self.store.get_rooms_for_user(sender_user_id) + if not room_ids: + logger.info( + "Received device message from remote device we don't" + " share a room with: %s %s", + sender_user_id, + requesting_device_ids, + ) + return + + # If we are tracking check that we know about the sending + # devices. + cached_devices = yield self.store.get_cached_devices_for_user(sender_user_id) + + unknown_devices = requesting_device_ids - set(cached_devices) + if unknown_devices: + logger.info( + "Received device message from remote device not in our cache: %s %s", + sender_user_id, + unknown_devices, + ) + yield self.store.mark_remote_user_device_cache_as_stale(sender_user_id) + # TODO: Poke something to start trying to refetch user's + # keys. + @defer.inlineCallbacks def send_device_message(self, sender_user_id, message_type, messages): set_tag("number_of_messages", len(messages)) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 180f165a7a..a67020a259 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -742,6 +742,26 @@ class FederationHandler(BaseHandler): user = UserID.from_string(event.state_key) await self.user_joined_room(user, room_id) + # For encrypted messages we check that we know about the sending device, + # if we don't then we mark the device cache for that user as stale. + if event.type == EventTypes.Encryption: + device_id = event.content.get("device_id") + if device_id is not None: + cached_devices = await self.store.get_cached_devices_for_user( + event.sender + ) + if device_id not in cached_devices: + logger.info( + "Received event from remote device not in our cache: %s %s", + event.sender, + device_id, + ) + await self.store.mark_remote_user_device_cache_as_stale( + event.sender + ) + # TODO: Poke something to start trying to refetch user's + # keys. + @log_function async def backfill(self, dest, room_id, limit, extremities): """ Trigger a backfill request to `dest` for the given `room_id` diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index dc625e0d7a..1c77687eea 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -72,6 +72,6 @@ class SlavedDeviceStore(EndToEndKeyWorkerStore, DeviceWorkerStore, BaseSlavedSto destination, token ) - self._get_cached_devices_for_user.invalidate((user_id,)) + self.get_cached_devices_for_user.invalidate((user_id,)) self._get_cached_user_device.invalidate_many((user_id,)) self.get_device_list_last_stream_id_for_remote.invalidate((user_id,)) diff --git a/synapse/storage/data_stores/main/devices.py b/synapse/storage/data_stores/main/devices.py index f0a7962dd0..30bf66b2b6 100644 --- a/synapse/storage/data_stores/main/devices.py +++ b/synapse/storage/data_stores/main/devices.py @@ -457,7 +457,7 @@ class DeviceWorkerStore(SQLBaseStore): device = yield self._get_cached_user_device(user_id, device_id) results.setdefault(user_id, {})[device_id] = device else: - results[user_id] = yield self._get_cached_devices_for_user(user_id) + results[user_id] = yield self.get_cached_devices_for_user(user_id) set_tag("in_cache", results) set_tag("not_in_cache", user_ids_not_in_cache) @@ -475,12 +475,12 @@ class DeviceWorkerStore(SQLBaseStore): return db_to_json(content) @cachedInlineCallbacks() - def _get_cached_devices_for_user(self, user_id): + def get_cached_devices_for_user(self, user_id): devices = yield self.db.simple_select_list( table="device_lists_remote_cache", keyvalues={"user_id": user_id}, retcols=("device_id", "content"), - desc="_get_cached_devices_for_user", + desc="get_cached_devices_for_user", ) return { device["device_id"]: db_to_json(device["content"]) for device in devices @@ -641,6 +641,18 @@ class DeviceWorkerStore(SQLBaseStore): return results + def mark_remote_user_device_cache_as_stale(self, user_id: str): + """Records that the server has reason to believe the cache of the devices + for the remote users is out of date. + """ + return self.db.simple_upsert( + table="device_lists_remote_resync", + keyvalues={"user_id": user_id}, + values={}, + insertion_values={"added_ts": self._clock.time_msec()}, + desc="make_remote_user_device_cache_as_stale", + ) + class DeviceBackgroundUpdateStore(SQLBaseStore): def __init__(self, database: Database, db_conn, hs): @@ -887,7 +899,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ) txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id)) - txn.call_after(self._get_cached_devices_for_user.invalidate, (user_id,)) + txn.call_after(self.get_cached_devices_for_user.invalidate, (user_id,)) txn.call_after( self.get_device_list_last_stream_id_for_remote.invalidate, (user_id,) ) @@ -902,6 +914,13 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): lock=False, ) + # If we're replacing the remote user's device list cache presumably + # we've done a full resync, so we remove the entry that says we need + # to resync + self.db.simple_delete_txn( + txn, table="device_lists_remote_resync", keyvalues={"user_id": user_id}, + ) + def update_remote_device_list_cache(self, user_id, devices, stream_id): """Replace the entire cache of the remote user's devices. @@ -942,7 +961,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ], ) - txn.call_after(self._get_cached_devices_for_user.invalidate, (user_id,)) + txn.call_after(self.get_cached_devices_for_user.invalidate, (user_id,)) txn.call_after(self._get_cached_user_device.invalidate_many, (user_id,)) txn.call_after( self.get_device_list_last_stream_id_for_remote.invalidate, (user_id,) diff --git a/synapse/storage/data_stores/main/schema/delta/57/device_list_remote_cache_stale.sql b/synapse/storage/data_stores/main/schema/delta/57/device_list_remote_cache_stale.sql new file mode 100644 index 0000000000..c3b6de2099 --- /dev/null +++ b/synapse/storage/data_stores/main/schema/delta/57/device_list_remote_cache_stale.sql @@ -0,0 +1,25 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Records whether the server thinks that the remote users cached device lists +-- may be out of date (e.g. if we have received a to device message from a +-- device we don't know about). +CREATE TABLE IF NOT EXISTS device_lists_remote_resync ( + user_id TEXT NOT NULL, + added_ts BIGINT NOT NULL +); + +CREATE UNIQUE INDEX device_lists_remote_resync_idx ON device_lists_remote_resync (user_id); +CREATE INDEX device_lists_remote_resync_ts_idx ON device_lists_remote_resync (added_ts); -- cgit 1.5.1 From c3d4ad8afdbe181707451410100dec4817c2c01a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 Jan 2020 16:42:11 +0000 Subject: Fix sending server up commands from workers (#6811) Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- changelog.d/6811.bugfix | 1 + synapse/federation/transport/client.py | 5 ++++- synapse/federation/transport/server.py | 26 +++++++++++++++----------- synapse/replication/tcp/client.py | 4 ++++ synapse/server.pyi | 12 +++++++++++- tox.ini | 1 + 6 files changed, 36 insertions(+), 13 deletions(-) create mode 100644 changelog.d/6811.bugfix (limited to 'synapse/replication') diff --git a/changelog.d/6811.bugfix b/changelog.d/6811.bugfix new file mode 100644 index 0000000000..361f2fc2e8 --- /dev/null +++ b/changelog.d/6811.bugfix @@ -0,0 +1 @@ +Fix waking up other workers when remote server is detected to have come back online. diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 198257414b..dc563538de 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -15,6 +15,7 @@ # limitations under the License. import logging +from typing import Any, Dict from six.moves import urllib @@ -352,7 +353,9 @@ class TransportLayerClient(object): else: path = _create_v1_path("/publicRooms") - args = {"include_all_networks": "true" if include_all_networks else "false"} + args = { + "include_all_networks": "true" if include_all_networks else "false" + } # type: Dict[str, Any] if third_party_instance_id: args["third_party_instance_id"] = (third_party_instance_id,) if limit: diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index d8cf9ed299..125eadd796 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -18,6 +18,7 @@ import functools import logging import re +from typing import Optional, Tuple, Type from twisted.internet.defer import maybeDeferred @@ -267,6 +268,8 @@ class BaseFederationServlet(object): returned. """ + PATH = "" # Overridden in subclasses, the regex to match against the path. + REQUIRE_AUTH = True PREFIX = FEDERATION_V1_PREFIX # Allows specifying the API version @@ -347,9 +350,6 @@ class BaseFederationServlet(object): return response - # Extra logic that functools.wraps() doesn't finish - new_func.__self__ = func.__self__ - return new_func def register(self, server): @@ -824,7 +824,7 @@ class PublicRoomList(BaseFederationServlet): if not self.allow_access: raise FederationDeniedError(origin) - limit = int(content.get("limit", 100)) + limit = int(content.get("limit", 100)) # type: Optional[int] since_token = content.get("since", None) search_filter = content.get("filter", None) @@ -971,7 +971,7 @@ class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet): if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - result = await self.groups_handler.update_room_in_group( + result = await self.handler.update_room_in_group( group_id, requester_user_id, room_id, config_key, content ) @@ -1422,11 +1422,13 @@ FEDERATION_SERVLET_CLASSES = ( On3pidBindServlet, FederationVersionServlet, RoomComplexityServlet, -) +) # type: Tuple[Type[BaseFederationServlet], ...] -OPENID_SERVLET_CLASSES = (OpenIdUserInfo,) +OPENID_SERVLET_CLASSES = ( + OpenIdUserInfo, +) # type: Tuple[Type[BaseFederationServlet], ...] -ROOM_LIST_CLASSES = (PublicRoomList,) +ROOM_LIST_CLASSES = (PublicRoomList,) # type: Tuple[Type[PublicRoomList], ...] GROUP_SERVER_SERVLET_CLASSES = ( FederationGroupsProfileServlet, @@ -1447,17 +1449,19 @@ GROUP_SERVER_SERVLET_CLASSES = ( FederationGroupsAddRoomsServlet, FederationGroupsAddRoomsConfigServlet, FederationGroupsSettingJoinPolicyServlet, -) +) # type: Tuple[Type[BaseFederationServlet], ...] GROUP_LOCAL_SERVLET_CLASSES = ( FederationGroupsLocalInviteServlet, FederationGroupsRemoveLocalUserServlet, FederationGroupsBulkPublicisedServlet, -) +) # type: Tuple[Type[BaseFederationServlet], ...] -GROUP_ATTESTATION_SERVLET_CLASSES = (FederationGroupsRenewAttestaionServlet,) +GROUP_ATTESTATION_SERVLET_CLASSES = ( + FederationGroupsRenewAttestaionServlet, +) # type: Tuple[Type[BaseFederationServlet], ...] DEFAULT_SERVLET_GROUPS = ( "federation", diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index fc06a7b053..02ab5b66ea 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -31,6 +31,7 @@ from .commands import ( Command, FederationAckCommand, InvalidateCacheCommand, + RemoteServerUpCommand, RemovePusherCommand, UserIpCommand, UserSyncCommand, @@ -210,6 +211,9 @@ class ReplicationClientHandler(AbstractReplicationClientHandler): cmd = UserIpCommand(user_id, access_token, ip, user_agent, device_id, last_seen) self.send_command(cmd) + def send_remote_server_up(self, server: str): + self.send_command(RemoteServerUpCommand(server)) + def await_sync(self, data): """Returns a deferred that is resolved when we receive a SYNC command with given data. diff --git a/synapse/server.pyi b/synapse/server.pyi index 0731403047..90347ac23e 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -2,8 +2,8 @@ import twisted.internet import synapse.api.auth import synapse.config.homeserver +import synapse.crypto.keyring import synapse.federation.sender -import synapse.federation.transaction_queue import synapse.federation.transport.client import synapse.handlers import synapse.handlers.auth @@ -17,6 +17,7 @@ import synapse.handlers.room_member import synapse.handlers.set_password import synapse.http.client import synapse.notifier +import synapse.replication.tcp.client import synapse.rest.media.v1.media_repository import synapse.server_notices.server_notices_manager import synapse.server_notices.server_notices_sender @@ -27,6 +28,9 @@ class HomeServer(object): @property def config(self) -> synapse.config.homeserver.HomeServerConfig: pass + @property + def hostname(self) -> str: + pass def get_auth(self) -> synapse.api.auth.Auth: pass def get_auth_handler(self) -> synapse.handlers.auth.AuthHandler: @@ -97,3 +101,9 @@ class HomeServer(object): pass def get_reactor(self) -> twisted.internet.base.ReactorBase: pass + def get_keyring(self) -> synapse.crypto.keyring.Keyring: + pass + def get_tcp_replication( + self, + ) -> synapse.replication.tcp.client.ReplicationClientHandler: + pass diff --git a/tox.ini b/tox.ini index 1d946a02ba..88ef12bebd 100644 --- a/tox.ini +++ b/tox.ini @@ -179,6 +179,7 @@ extras = all commands = mypy \ synapse/api \ synapse/config/ \ + synapse/federation/transport \ synapse/handlers/ui_auth \ synapse/logging/ \ synapse/module_api \ -- cgit 1.5.1