From b106080fb4d92a9969acc511a1276063d95b5287 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Feb 2021 16:22:21 +0000 Subject: Regenerate exact thumbnails if missing --- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 49 +++++++++++++++++++++- synapse/storage/databases/main/media_repository.py | 18 ++++---- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a0162d4255..3375455c43 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -509,7 +509,7 @@ class MediaRepository: t_height: int, t_method: str, t_type: str, - url_cache: str, + url_cache: Optional[str], ) -> Optional[str]: input_path = await self.media_storage.ensure_media_is_in_local_cache( FileInfo(None, media_id, url_cache=url_cache) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index d653a58be9..c345ec6498 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -114,6 +114,7 @@ class ThumbnailResource(DirectServeJsonResource): m_type, thumbnail_infos, media_id, + media_id, url_cache=media_info["url_cache"], server_name=None, ) @@ -269,6 +270,7 @@ class ThumbnailResource(DirectServeJsonResource): method, m_type, thumbnail_infos, + media_id, media_info["filesystem_id"], url_cache=None, server_name=server_name, @@ -282,6 +284,7 @@ class ThumbnailResource(DirectServeJsonResource): desired_method: str, desired_type: str, thumbnail_infos: List[Dict[str, Any]], + media_id: str, file_id: str, url_cache: Optional[str] = None, server_name: Optional[str] = None, @@ -316,9 +319,53 @@ class ThumbnailResource(DirectServeJsonResource): respond_404(request) return + responder = await self.media_storage.fetch_media(file_info) + if responder: + await respond_with_responder( + request, + responder, + file_info.thumbnail_type, + file_info.thumbnail_length, + ) + return + + # If we can't find the thumbnail we regenerate it. This can happen + # if e.g. we've deleted the thumbnails but still have the original + # image somewhere. + # + # Since we have an entry for the thumbnail in the DB we a) know we + # have have successfully generated the thumbnail in the past (so we + # don't need to worry about repeatedly failing to generate + # thumbnails), and b) have already calculated that appropriate + # width/height/method so we can just call the "generate exact" + # methods. + + if server_name: + await self.media_repo.generate_remote_exact_thumbnail( + server_name, + file_id=file_id, + media_id=media_id, + t_width=file_info.thumbnail_width, + t_height=file_info.thumbnail_height, + t_method=file_info.thumbnail_method, + t_type=file_info.thumbnail_type, + ) + else: + await self.media_repo.generate_local_exact_thumbnail( + media_id=media_id, + t_width=file_info.thumbnail_width, + t_height=file_info.thumbnail_height, + t_method=file_info.thumbnail_method, + t_type=file_info.thumbnail_type, + url_cache=url_cache, + ) + responder = await self.media_storage.fetch_media(file_info) await respond_with_responder( - request, responder, file_info.thumbnail_type, file_info.thumbnail_length + request, + responder, + file_info.thumbnail_type, + file_info.thumbnail_length, ) else: logger.info("Failed to find any generated thumbnails") diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index a0313c3ccf..9ee642c668 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -344,16 +344,16 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): thumbnail_method, thumbnail_length, ): - await self.db_pool.simple_insert( - "local_media_repository_thumbnails", - { + await self.db_pool.simple_upsert( + table="local_media_repository_thumbnails", + keyvalues={ "media_id": media_id, "thumbnail_width": thumbnail_width, "thumbnail_height": thumbnail_height, "thumbnail_method": thumbnail_method, "thumbnail_type": thumbnail_type, - "thumbnail_length": thumbnail_length, }, + values={"thumbnail_length": thumbnail_length}, desc="store_local_thumbnail", ) @@ -498,18 +498,18 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): thumbnail_method, thumbnail_length, ): - await self.db_pool.simple_insert( - "remote_media_cache_thumbnails", - { + await self.db_pool.simple_upsert( + table="remote_media_cache_thumbnails", + keyvalues={ "media_origin": origin, "media_id": media_id, "thumbnail_width": thumbnail_width, "thumbnail_height": thumbnail_height, "thumbnail_method": thumbnail_method, "thumbnail_type": thumbnail_type, - "thumbnail_length": thumbnail_length, - "filesystem_id": filesystem_id, }, + values={"thumbnail_length": thumbnail_length}, + insertion_values={"filesystem_id": filesystem_id}, desc="store_remote_media_thumbnail", ) -- cgit 1.4.1 From 2d577283aba3524f8e7cd9e1db75f87524373f6b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Feb 2021 16:53:00 +0000 Subject: Newsfile --- changelog.d/9438.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/9438.feature diff --git a/changelog.d/9438.feature b/changelog.d/9438.feature new file mode 100644 index 0000000000..a1f6be5563 --- /dev/null +++ b/changelog.d/9438.feature @@ -0,0 +1 @@ +Add support for regenerating thumbnails if they have been deleted but the original image is still stored. -- cgit 1.4.1 From 9bc74743d53f4795b8bbd674d2945962a9bb29f9 Mon Sep 17 00:00:00 2001 From: AndrewFerr Date: Fri, 19 Feb 2021 04:50:41 -0500 Subject: Add configs to make profile data more private (#9203) Add off-by-default configuration settings to: - disable putting an invitee's profile info in invite events - disable profile lookup via federation Signed-off-by: Andrew Ferrazzutti --- changelog.d/9203.feature | 1 + docs/sample_config.yaml | 14 ++++++++++++++ synapse/config/federation.py | 10 ++++++++++ synapse/config/server.py | 14 ++++++++++++++ synapse/federation/transport/server.py | 7 +++---- synapse/handlers/message.py | 8 +++++++- synapse/handlers/profile.py | 10 ++++++++++ synapse/replication/http/federation.py | 3 ++- tests/handlers/test_profile.py | 6 +++++- 9 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 changelog.d/9203.feature diff --git a/changelog.d/9203.feature b/changelog.d/9203.feature new file mode 100644 index 0000000000..36b66a47a8 --- /dev/null +++ b/changelog.d/9203.feature @@ -0,0 +1 @@ +Add some configuration settings to make users' profile data more private. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 52380dfb04..e5615f961b 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -101,6 +101,14 @@ pid_file: DATADIR/homeserver.pid # #limit_profile_requests_to_users_who_share_rooms: true +# Uncomment to prevent a user's profile data from being retrieved and +# displayed in a room until they have joined it. By default, a user's +# profile data is included in an invite event, regardless of the values +# of the above two settings, and whether or not the users share a server. +# Defaults to 'true'. +# +#include_profile_data_on_invite: false + # If set to 'true', removes the need for authentication to access the server's # public rooms directory through the client API, meaning that anyone can # query the room directory. Defaults to 'false'. @@ -699,6 +707,12 @@ acme: # - matrix.org # - example.com +# Uncomment to disable profile lookup over federation. By default, the +# Federation API allows other homeservers to obtain profile data of any user +# on this homeserver. Defaults to 'true'. +# +#allow_profile_lookup_over_federation: false + ## Caching ## diff --git a/synapse/config/federation.py b/synapse/config/federation.py index 9f3c57e6a1..55e4db5442 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -41,6 +41,10 @@ class FederationConfig(Config): ) self.federation_metrics_domains = set(federation_metrics_domains) + self.allow_profile_lookup_over_federation = config.get( + "allow_profile_lookup_over_federation", True + ) + def generate_config_section(self, config_dir_path, server_name, **kwargs): return """\ ## Federation ## @@ -66,6 +70,12 @@ class FederationConfig(Config): #federation_metrics_domains: # - matrix.org # - example.com + + # Uncomment to disable profile lookup over federation. By default, the + # Federation API allows other homeservers to obtain profile data of any user + # on this homeserver. Defaults to 'true'. + # + #allow_profile_lookup_over_federation: false """ diff --git a/synapse/config/server.py b/synapse/config/server.py index 6f3325ff81..0bfd4398e2 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -263,6 +263,12 @@ class ServerConfig(Config): False, ) + # Whether to retrieve and display profile data for a user when they + # are invited to a room + self.include_profile_data_on_invite = config.get( + "include_profile_data_on_invite", True + ) + if "restrict_public_rooms_to_local_users" in config and ( "allow_public_rooms_without_auth" in config or "allow_public_rooms_over_federation" in config @@ -848,6 +854,14 @@ class ServerConfig(Config): # #limit_profile_requests_to_users_who_share_rooms: true + # Uncomment to prevent a user's profile data from being retrieved and + # displayed in a room until they have joined it. By default, a user's + # profile data is included in an invite event, regardless of the values + # of the above two settings, and whether or not the users share a server. + # Defaults to 'true'. + # + #include_profile_data_on_invite: false + # If set to 'true', removes the need for authentication to access the server's # public rooms directory through the client API, meaning that anyone can # query the room directory. Defaults to 'false'. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index cce83704d4..2cf935f38d 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -484,10 +484,9 @@ class FederationQueryServlet(BaseFederationServlet): # This is when we receive a server-server Query async def on_GET(self, origin, content, query, query_type): - return await self.handler.on_query_request( - query_type, - {k.decode("utf8"): v[0].decode("utf-8") for k, v in query.items()}, - ) + args = {k.decode("utf8"): v[0].decode("utf-8") for k, v in query.items()} + args["origin"] = origin + return await self.handler.on_query_request(query_type, args) class FederationMakeJoinServlet(BaseFederationServlet): diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c03f6c997b..1b7c065b34 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -387,6 +387,12 @@ class EventCreationHandler: self.room_invite_state_types = self.hs.config.room_invite_state_types + self.membership_types_to_include_profile_data_in = ( + {Membership.JOIN, Membership.INVITE} + if self.hs.config.include_profile_data_on_invite + else {Membership.JOIN} + ) + self.send_event = ReplicationSendEventRestServlet.make_client(hs) # This is only used to get at ratelimit function, and maybe_kick_guest_users @@ -500,7 +506,7 @@ class EventCreationHandler: membership = builder.content.get("membership", None) target = UserID.from_string(builder.state_key) - if membership in {Membership.JOIN, Membership.INVITE}: + if membership in self.membership_types_to_include_profile_data_in: # If event doesn't include a display name, add one. profile = self.profile_handler content = builder.content diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 2f62d84fb5..d933dd3f01 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -310,6 +310,16 @@ class ProfileHandler(BaseHandler): await self._update_join_states(requester, target_user) async def on_profile_query(self, args: JsonDict) -> JsonDict: + """Handles federation profile query requests. + """ + + if not self.hs.config.allow_profile_lookup_over_federation: + raise SynapseError( + 403, + "Profile lookup over federation is disabled on this homeserver", + Codes.FORBIDDEN, + ) + user = UserID.from_string(args["user_id"]) if not self.hs.is_mine(user): raise SynapseError(400, "User is not hosted on this homeserver") diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 7a0dbb5b1a..8af53b4f28 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -213,8 +213,9 @@ class ReplicationGetQueryRestServlet(ReplicationEndpoint): content = parse_json_object_from_request(request) args = content["args"] + args["origin"] = content["origin"] - logger.info("Got %r query", query_type) + logger.info("Got %r query from %s", query_type, args["origin"]) result = await self.registry.on_query(query_type, args) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 18ca8b84f5..75c6a4e21c 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -161,7 +161,11 @@ class ProfileTestCase(unittest.HomeserverTestCase): response = self.get_success( self.query_handlers["profile"]( - {"user_id": "@caroline:test", "field": "displayname"} + { + "user_id": "@caroline:test", + "field": "displayname", + "origin": "servername.tld", + } ) ) -- cgit 1.4.1 From 3d2acc930f0a633bf400c96f3e636b5b662a54cb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 Feb 2021 10:46:18 +0000 Subject: Return a 404 if we don't have the original file --- synapse/rest/media/v1/media_storage.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 1057e638be..b1b1c9e6ec 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -244,7 +244,7 @@ class MediaStorage: await consumer.wait() return local_path - raise Exception("file could not be found") + raise NotFoundError() def _file_info_to_path(self, file_info: FileInfo) -> str: """Converts file_info into a relative path. diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index c345ec6498..3ab90e9f9b 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -340,6 +340,13 @@ class ThumbnailResource(DirectServeJsonResource): # width/height/method so we can just call the "generate exact" # methods. + # First let's check that we do actually have the original image + # still. This will throw a 404 if we don't. + # TODO: We should refetch the thumbnails for remote media. + await self.media_storage.ensure_media_is_in_local_cache( + FileInfo(server_name, file_id, url_cache=url_cache) + ) + if server_name: await self.media_repo.generate_remote_exact_thumbnail( server_name, -- cgit 1.4.1 From 13e9029f447e724016b904a1c847de2ad4736d66 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 19 Feb 2021 11:02:03 +0000 Subject: Add a config option to prioritise local users in user directory search results (#9383) This PR adds a homeserver config option, `user_directory.prefer_local_users`, that when enabled will show local users higher in user directory search results than remote users. This option is off by default. Note that turning this on doesn't necessarily mean that remote users will always be put below local users, but they should be assuming all other ranking factors (search query match, profile information present etc) are identical. This is useful for, say, University networks that are openly federating, but want to prioritise local students and staff in the user directory over other random users. --- changelog.d/9383.feature | 1 + docs/sample_config.yaml | 5 ++ synapse/config/user_directory.py | 9 +++ synapse/storage/databases/main/user_directory.py | 59 ++++++++++++--- tests/handlers/test_user_directory.py | 94 ++++++++++++++++++++++++ 5 files changed, 159 insertions(+), 9 deletions(-) create mode 100644 changelog.d/9383.feature diff --git a/changelog.d/9383.feature b/changelog.d/9383.feature new file mode 100644 index 0000000000..8957c9cc5e --- /dev/null +++ b/changelog.d/9383.feature @@ -0,0 +1 @@ +Add a configuration option, `user_directory.prefer_local_users`, which when enabled will make it more likely for users on the same server as you to appear above other users. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index e5615f961b..c660c62620 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2554,9 +2554,14 @@ spam_checker: # rebuild the user_directory search indexes, see # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md # +# 'prefer_local_users' defines whether to prioritise local users in +# search query results. If True, local users are more likely to appear above +# remote users when searching the user directory. Defaults to false. +# #user_directory: # enabled: true # search_all_users: false +# prefer_local_users: false # User Consent configuration diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index c8d19c5d6b..89dbebd148 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -26,6 +26,7 @@ class UserDirectoryConfig(Config): def read_config(self, config, **kwargs): self.user_directory_search_enabled = True self.user_directory_search_all_users = False + self.user_directory_search_prefer_local_users = False user_directory_config = config.get("user_directory", None) if user_directory_config: self.user_directory_search_enabled = user_directory_config.get( @@ -34,6 +35,9 @@ class UserDirectoryConfig(Config): self.user_directory_search_all_users = user_directory_config.get( "search_all_users", False ) + self.user_directory_search_prefer_local_users = user_directory_config.get( + "prefer_local_users", False + ) def generate_config_section(self, config_dir_path, server_name, **kwargs): return """ @@ -49,7 +53,12 @@ class UserDirectoryConfig(Config): # rebuild the user_directory search indexes, see # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md # + # 'prefer_local_users' defines whether to prioritise local users in + # search query results. If True, local users are more likely to appear above + # remote users when searching the user directory. Defaults to false. + # #user_directory: # enabled: true # search_all_users: false + # prefer_local_users: false """ diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 63f88eac51..02ee15676c 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -556,6 +556,11 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) + self._prefer_local_users_in_search = ( + hs.config.user_directory_search_prefer_local_users + ) + self._server_name = hs.config.server_name + async def remove_from_user_dir(self, user_id: str) -> None: def _remove_from_user_dir_txn(txn): self.db_pool.simple_delete_txn( @@ -754,9 +759,24 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): ) """ + # We allow manipulating the ranking algorithm by injecting statements + # based on config options. + additional_ordering_statements = [] + ordering_arguments = () + if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) + # If enabled, this config option will rank local users higher than those on + # remote instances. + if self._prefer_local_users_in_search: + # This statement checks whether a given user's user ID contains a server name + # that matches the local server + statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)" + additional_ordering_statements.append(statement) + + ordering_arguments += ("%:" + self._server_name,) + # We order by rank and then if they have profile info # The ranking algorithm is hand tweaked for "best" results. Broadly # the idea is we give a higher weight to exact matches. @@ -767,7 +787,7 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) WHERE - %s + %(where_clause)s AND vector @@ to_tsquery('simple', ?) ORDER BY (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) @@ -787,33 +807,54 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): 8 ) ) + %(order_case_statements)s DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( - where_clause, + """ % { + "where_clause": where_clause, + "order_case_statements": " ".join(additional_ordering_statements), + } + args = ( + join_args + + (full_query, exact_query, prefix_query) + + ordering_arguments + + (limit + 1,) ) - args = join_args + (full_query, exact_query, prefix_query, limit + 1) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) + # If enabled, this config option will rank local users higher than those on + # remote instances. + if self._prefer_local_users_in_search: + # This statement checks whether a given user's user ID contains a server name + # that matches the local server + # + # Note that we need to include a comma at the end for valid SQL + statement = "user_id LIKE ? DESC," + additional_ordering_statements.append(statement) + + ordering_arguments += ("%:" + self._server_name,) + sql = """ SELECT d.user_id AS user_id, display_name, avatar_url FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) WHERE - %s + %(where_clause)s AND value MATCH ? ORDER BY rank(matchinfo(user_directory_search)) DESC, + %(order_statements)s display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( - where_clause, - ) - args = join_args + (search_query, limit + 1) + """ % { + "where_clause": where_clause, + "order_statements": " ".join(additional_ordering_statements), + } + args = join_args + (search_query,) + ordering_arguments + (limit + 1,) else: # This should be unreachable. raise Exception("Unrecognized database engine") diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 3572e54c5d..8dddc12204 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -18,6 +18,7 @@ from twisted.internet import defer import synapse.rest.admin from synapse.api.constants import EventTypes, RoomEncryptionAlgorithms, UserTypes +from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.rest.client.v1 import login, room from synapse.rest.client.v2_alpha import user_directory from synapse.storage.roommember import ProfileInfo @@ -46,6 +47,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): self.store = hs.get_datastore() self.handler = hs.get_user_directory_handler() + self.event_builder_factory = self.hs.get_event_builder_factory() + self.event_creation_handler = self.hs.get_event_creation_handler() def test_handle_local_profile_change_with_support_user(self): support_user_id = "@support:test" @@ -547,6 +550,97 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): s = self.get_success(self.handler.search_users(u1, u4, 10)) self.assertEqual(len(s["results"]), 1) + @override_config( + { + "user_directory": { + "enabled": True, + "search_all_users": True, + "prefer_local_users": True, + } + } + ) + def test_prefer_local_users(self): + """Tests that local users are shown higher in search results when + user_directory.prefer_local_users is True. + """ + # Create a room and few users to test the directory with + searching_user = self.register_user("searcher", "password") + searching_user_tok = self.login("searcher", "password") + + room_id = self.helper.create_room_as( + searching_user, + room_version=RoomVersions.V1.identifier, + tok=searching_user_tok, + ) + + # Create a few local users and join them to the room + local_user_1 = self.register_user("user_xxxxx", "password") + local_user_2 = self.register_user("user_bbbbb", "password") + local_user_3 = self.register_user("user_zzzzz", "password") + + self._add_user_to_room(room_id, RoomVersions.V1, local_user_1) + self._add_user_to_room(room_id, RoomVersions.V1, local_user_2) + self._add_user_to_room(room_id, RoomVersions.V1, local_user_3) + + # Create a few "remote" users and join them to the room + remote_user_1 = "@user_aaaaa:remote_server" + remote_user_2 = "@user_yyyyy:remote_server" + remote_user_3 = "@user_ccccc:remote_server" + self._add_user_to_room(room_id, RoomVersions.V1, remote_user_1) + self._add_user_to_room(room_id, RoomVersions.V1, remote_user_2) + self._add_user_to_room(room_id, RoomVersions.V1, remote_user_3) + + local_users = [local_user_1, local_user_2, local_user_3] + remote_users = [remote_user_1, remote_user_2, remote_user_3] + + # Populate the user directory via background update + self._add_background_updates() + while not self.get_success( + self.store.db_pool.updates.has_completed_background_updates() + ): + self.get_success( + self.store.db_pool.updates.do_next_background_update(100), by=0.1 + ) + + # The local searching user searches for the term "user", which other users have + # in their user id + results = self.get_success( + self.handler.search_users(searching_user, "user", 20) + )["results"] + received_user_id_ordering = [result["user_id"] for result in results] + + # Typically we'd expect Synapse to return users in lexicographical order, + # assuming they have similar User IDs/display names, and profile information. + + # Check that the order of returned results using our module is as we expect, + # i.e our local users show up first, despite all users having lexographically mixed + # user IDs. + [self.assertIn(user, local_users) for user in received_user_id_ordering[:3]] + [self.assertIn(user, remote_users) for user in received_user_id_ordering[3:]] + + def _add_user_to_room( + self, room_id: str, room_version: RoomVersion, user_id: str, + ): + # Add a user to the room. + builder = self.event_builder_factory.for_room_version( + room_version, + { + "type": "m.room.member", + "sender": user_id, + "state_key": user_id, + "room_id": room_id, + "content": {"membership": "join"}, + }, + ) + + event, context = self.get_success( + self.event_creation_handler.create_new_client_event(builder) + ) + + self.get_success( + self.hs.get_storage().persistence.persist_event(event, context) + ) + class TestUserDirSearchDisabled(unittest.HomeserverTestCase): user_id = "@test:test" -- cgit 1.4.1 From 8bcfc2eaad66aa6e92a745d226fee4a81a921541 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 19 Feb 2021 11:37:29 +0000 Subject: Be smarter about which hosts to send presence to when processing room joins (#9402) This PR attempts to eliminate unnecessary presence sending work when your local server joins a room, or when a remote server joins a room your server is participating in by processing state deltas in chunks rather than individually. --- When your server joins a room for the first time, it requests the historical state as well. This chunk of new state is passed to the presence handler which, after filtering that state down to only membership joins, will send presence updates to homeservers for each join processed. It turns out that we were being a bit naive and processing each event individually, and sending out presence updates for every one of those joins. Even if many different joins were users on the same server (hello IRC bridges), we'd send presence to that same homeserver for every remote user join we saw. This PR attempts to deduplicate all of that by processing the entire batch of state deltas at once, instead of only doing each join individually. We process the joins and note down which servers need which presence: * If it was a local user join, send that user's latest presence to all servers in the room * If it was a remote user join, send the presence for all local users in the room to that homeserver We deduplicate by inserting all of those pending updates into a dictionary of the form: ``` { server_name1: {presence_update1, ...}, server_name2: {presence_update1, presence_update2, ...} } ``` Only after building this dict do we then start sending out presence updates. --- changelog.d/9402.bugfix | 1 + synapse/federation/sender/__init__.py | 2 +- synapse/handlers/presence.py | 56 ++++++++++++++++++++++++++--------- tests/handlers/test_presence.py | 14 ++++++--- 4 files changed, 54 insertions(+), 19 deletions(-) create mode 100644 changelog.d/9402.bugfix diff --git a/changelog.d/9402.bugfix b/changelog.d/9402.bugfix new file mode 100644 index 0000000000..7729225ba2 --- /dev/null +++ b/changelog.d/9402.bugfix @@ -0,0 +1 @@ +Fix a bug where a lot of unnecessary presence updates were sent when joining a room. diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 97fc4d0a82..24ebc4b803 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -474,7 +474,7 @@ class FederationSender: self._processing_pending_presence = False def send_presence_to_destinations( - self, states: List[UserPresenceState], destinations: List[str] + self, states: Iterable[UserPresenceState], destinations: Iterable[str] ) -> None: """Send the given presence states to the given destinations. destinations (list[str]) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index fb85b19770..b6a9ce4f38 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -849,6 +849,9 @@ class PresenceHandler(BasePresenceHandler): """Process current state deltas to find new joins that need to be handled. """ + # A map of destination to a set of user state that they should receive + presence_destinations = {} # type: Dict[str, Set[UserPresenceState]] + for delta in deltas: typ = delta["type"] state_key = delta["state_key"] @@ -858,6 +861,7 @@ class PresenceHandler(BasePresenceHandler): logger.debug("Handling: %r %r, %s", typ, state_key, event_id) + # Drop any event that isn't a membership join if typ != EventTypes.Member: continue @@ -880,13 +884,38 @@ class PresenceHandler(BasePresenceHandler): # Ignore changes to join events. continue - await self._on_user_joined_room(room_id, state_key) + # Retrieve any user presence state updates that need to be sent as a result, + # and the destinations that need to receive it + destinations, user_presence_states = await self._on_user_joined_room( + room_id, state_key + ) + + # Insert the destinations and respective updates into our destinations dict + for destination in destinations: + presence_destinations.setdefault(destination, set()).update( + user_presence_states + ) + + # Send out user presence updates for each destination + for destination, user_state_set in presence_destinations.items(): + self.federation.send_presence_to_destinations( + destinations=[destination], states=user_state_set + ) - async def _on_user_joined_room(self, room_id: str, user_id: str) -> None: + async def _on_user_joined_room( + self, room_id: str, user_id: str + ) -> Tuple[List[str], List[UserPresenceState]]: """Called when we detect a user joining the room via the current state - delta stream. - """ + delta stream. Returns the destinations that need to be updated and the + presence updates to send to them. + + Args: + room_id: The ID of the room that the user has joined. + user_id: The ID of the user that has joined the room. + Returns: + A tuple of destinations and presence updates to send to them. + """ if self.is_mine_id(user_id): # If this is a local user then we need to send their presence # out to hosts in the room (who don't already have it) @@ -894,15 +923,15 @@ class PresenceHandler(BasePresenceHandler): # TODO: We should be able to filter the hosts down to those that # haven't previously seen the user - state = await self.current_state_for_user(user_id) - hosts = await self.state.get_current_hosts_in_room(room_id) + remote_hosts = await self.state.get_current_hosts_in_room(room_id) # Filter out ourselves. - hosts = {host for host in hosts if host != self.server_name} + filtered_remote_hosts = [ + host for host in remote_hosts if host != self.server_name + ] - self.federation.send_presence_to_destinations( - states=[state], destinations=hosts - ) + state = await self.current_state_for_user(user_id) + return filtered_remote_hosts, [state] else: # A remote user has joined the room, so we need to: # 1. Check if this is a new server in the room @@ -915,6 +944,8 @@ class PresenceHandler(BasePresenceHandler): # TODO: Check that this is actually a new server joining the # room. + remote_host = get_domain_from_id(user_id) + users = await self.state.get_current_users_in_room(room_id) user_ids = list(filter(self.is_mine_id, users)) @@ -934,10 +965,7 @@ class PresenceHandler(BasePresenceHandler): or state.status_msg is not None ] - if states: - self.federation.send_presence_to_destinations( - states=states, destinations=[get_domain_from_id(user_id)] - ) + return [remote_host], states def should_notify(old_state, new_state): diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index be2ee26f07..996c614198 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -521,7 +521,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase): ) self.assertEqual(expected_state.state, PresenceState.ONLINE) self.federation_sender.send_presence_to_destinations.assert_called_once_with( - destinations=["server2"], states=[expected_state] + destinations=["server2"], states={expected_state} ) # @@ -533,7 +533,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase): self.federation_sender.send_presence.assert_not_called() self.federation_sender.send_presence_to_destinations.assert_called_once_with( - destinations=["server3"], states=[expected_state] + destinations=["server3"], states={expected_state} ) def test_remote_gets_presence_when_local_user_joins(self): @@ -584,8 +584,14 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase): self.presence_handler.current_state_for_user("@test2:server") ) self.assertEqual(expected_state.state, PresenceState.ONLINE) - self.federation_sender.send_presence_to_destinations.assert_called_once_with( - destinations={"server2", "server3"}, states=[expected_state] + self.assertEqual( + self.federation_sender.send_presence_to_destinations.call_count, 2 + ) + self.federation_sender.send_presence_to_destinations.assert_any_call( + destinations=["server3"], states={expected_state} + ) + self.federation_sender.send_presence_to_destinations.assert_any_call( + destinations=["server2"], states={expected_state} ) def _add_new_user(self, room_id, user_id): -- cgit 1.4.1 From b114a45f5ff515dd4887e33e808bb02cb109dfff Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 19 Feb 2021 07:48:46 -0500 Subject: Support not providing an IdP icon when choosing a username. (#9440) --- changelog.d/9440.bugfix | 1 + synapse/res/templates/sso_auth_account_details.html | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/9440.bugfix diff --git a/changelog.d/9440.bugfix b/changelog.d/9440.bugfix new file mode 100644 index 0000000000..47b9842b37 --- /dev/null +++ b/changelog.d/9440.bugfix @@ -0,0 +1 @@ +Fix bug introduced in v1.27.0 where allowing a user to choose their own username when logging in via single sign-on did not work unless an `idp_icon` was defined. diff --git a/synapse/res/templates/sso_auth_account_details.html b/synapse/res/templates/sso_auth_account_details.html index f4fdc40b22..00e1dcdbb8 100644 --- a/synapse/res/templates/sso_auth_account_details.html +++ b/synapse/res/templates/sso_auth_account_details.html @@ -145,7 +145,7 @@ {% if user_attributes.avatar_url or user_attributes.display_name or user_attributes.emails %}
-

Information from {{ idp.idp_name }}

+

{% if idp.idp_icon %}{% endif %}Information from {{ idp.idp_name }}

{% if user_attributes.avatar_url %}