From f455b0e4209cd581ea7b75436b178d2843cc6e0d Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 20 Sep 2021 17:35:16 +0100 Subject: GHA: reintroduce an env var for `$GITHUB_HEAD_REF` (#10659) This should ensure GHA runs synapse against the same-named sytest branch --- .github/workflows/tests.yml | 1 + changelog.d/10659.misc | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/10659.misc diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8736699ad8..fa9c5e036a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -192,6 +192,7 @@ jobs: volumes: - ${{ github.workspace }}:/src env: + SYTEST_BRANCH: ${{ github.head_ref }} POSTGRES: ${{ matrix.postgres && 1}} MULTI_POSTGRES: ${{ (matrix.postgres == 'multi-postgres') && 1}} WORKERS: ${{ matrix.workers && 1 }} diff --git a/changelog.d/10659.misc b/changelog.d/10659.misc new file mode 100644 index 0000000000..d677a521c3 --- /dev/null +++ b/changelog.d/10659.misc @@ -0,0 +1 @@ +Fix GitHub Actions config so we can run sytest on synapse from parallel branches. \ No newline at end of file -- cgit 1.5.1 From 6a751ff5e064bbb1fae2915e533031531c9d74e7 Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Tue, 21 Sep 2021 05:23:34 -0500 Subject: Allow sending a membership event to unban a user (#10807) * Allow membership event to unban user Signed-off-by: Aaron Raimist --- changelog.d/10807.bugfix | 1 + synapse/handlers/room_member.py | 2 +- tests/rest/client/test_rooms.py | 87 ++++++++++++++++++++++++++++++++++++++++- tests/rest/client/utils.py | 11 ++++++ 4 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10807.bugfix diff --git a/changelog.d/10807.bugfix b/changelog.d/10807.bugfix new file mode 100644 index 0000000000..be03f5c738 --- /dev/null +++ b/changelog.d/10807.bugfix @@ -0,0 +1 @@ +Allow sending a membership event to unban a user. Contributed by @aaronraimist. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index a3e13c2270..7bb3f0bc47 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -668,7 +668,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): " (membership=%s)" % old_membership, errcode=Codes.BAD_STATE, ) - if old_membership == "ban" and action != "unban": + if old_membership == "ban" and action not in ["ban", "unban", "leave"]: raise SynapseError( 403, "Cannot %s user who was banned" % (action,), diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 50100a5ae4..5a01765f4d 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -26,7 +26,7 @@ from twisted.internet import defer import synapse.rest.admin from synapse.api.constants import EventContentFields, EventTypes, Membership -from synapse.api.errors import HttpResponseException +from synapse.api.errors import Codes, HttpResponseException from synapse.handlers.pagination import PurgeStatus from synapse.rest import admin from synapse.rest.client import account, directory, login, profile, room, sync @@ -377,6 +377,91 @@ class RoomPermissionsTestCase(RoomBase): expect_code=403, ) + # tests the "from banned" line from the table in https://spec.matrix.org/unstable/client-server-api/#mroommember + def test_member_event_from_ban(self): + room = self.created_rmid + self.helper.invite(room=room, src=self.rmcreator_id, targ=self.user_id) + self.helper.join(room=room, user=self.user_id) + + other = "@burgundy:red" + + # User cannot ban other since they do not have required power level + self.helper.change_membership( + room=room, + src=self.user_id, + targ=other, + membership=Membership.BAN, + expect_code=403, # expect failure + expect_errcode=Codes.FORBIDDEN, + ) + + # Admin bans other + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.BAN, + expect_code=200, + ) + + # from ban to invite: Must never happen. + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.INVITE, + expect_code=403, # expect failure + expect_errcode=Codes.BAD_STATE, + ) + + # from ban to join: Must never happen. + self.helper.change_membership( + room=room, + src=other, + targ=other, + membership=Membership.JOIN, + expect_code=403, # expect failure + expect_errcode=Codes.BAD_STATE, + ) + + # from ban to ban: No change. + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.BAN, + expect_code=200, + ) + + # from ban to knock: Must never happen. + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.KNOCK, + expect_code=403, # expect failure + expect_errcode=Codes.BAD_STATE, + ) + + # User cannot unban other since they do not have required power level + self.helper.change_membership( + room=room, + src=self.user_id, + targ=other, + membership=Membership.LEAVE, + expect_code=403, # expect failure + expect_errcode=Codes.FORBIDDEN, + ) + + # from ban to leave: User was unbanned. + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.LEAVE, + expect_code=200, + ) + class RoomsMemberListTestCase(RoomBase): """Tests /rooms/$room_id/members/list REST events.""" diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index 954ad1a1fd..c56e45fc10 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -138,6 +138,7 @@ class RestHelper: extra_data: Optional[dict] = None, tok: Optional[str] = None, expect_code: int = 200, + expect_errcode: str = None, ) -> None: """ Send a membership state event into a room. @@ -150,6 +151,7 @@ class RestHelper: extra_data: Extra information to include in the content of the event tok: The user access token to use expect_code: The expected HTTP response code + expect_errcode: The expected Matrix error code """ temp_id = self.auth_user_id self.auth_user_id = src @@ -177,6 +179,15 @@ class RestHelper: channel.result["body"], ) + if expect_errcode: + assert ( + str(channel.json_body["errcode"]) == expect_errcode + ), "Expected: %r, got: %r, resp: %r" % ( + expect_errcode, + channel.json_body["errcode"], + channel.result["body"], + ) + self.auth_user_id = temp_id def send( -- cgit 1.5.1 From c4ef61136f4d0819785965f53ec9db607da8b907 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 21 Sep 2021 11:49:15 +0100 Subject: 1.43.0 --- CHANGES.md | 6 ++++++ debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index ce3b0adae5..14248686be 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +Synapse 1.43.0 (2021-09-21) +=========================== + +No significant changes. + + Synapse 1.43.0rc2 (2021-09-17) ============================== diff --git a/debian/changelog b/debian/changelog index 7774cad55b..4b07d04128 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.43.0) stable; urgency=medium + + * New synapse release 1.43.0. + + -- Synapse Packaging team Tue, 21 Sep 2021 11:49:05 +0100 + matrix-synapse-py3 (1.43.0~rc2) stable; urgency=medium * New synapse release 1.43.0~rc2. diff --git a/synapse/__init__.py b/synapse/__init__.py index c9ef90ccaa..5f5cff1dfd 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ try: except ImportError: pass -__version__ = "1.43.0rc2" +__version__ = "1.43.0" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when -- cgit 1.5.1 From 6c92ba3eaca0cc55e1ba54a64f7150e5dde49dd8 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 21 Sep 2021 11:52:37 +0100 Subject: Move deprecation notice from 1.43 rc to release --- CHANGES.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 14248686be..80a14020d1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,9 @@ Synapse 1.43.0 (2021-09-21) =========================== -No significant changes. +This release drops support for the deprecated, unstable API for [MSC2858](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), as well as the undocumented `experimental.msc2858_enabled` config option. Client authors should update their clients to use the stable API, available since Synapse 1.30. +No significant changes since 1.43.0rc2. Synapse 1.43.0rc2 (2021-09-17) ============================== @@ -16,8 +17,6 @@ Bugfixes Synapse 1.43.0rc1 (2021-09-14) ============================== -This release drops support for the deprecated, unstable API for [MSC2858](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), as well as the undocumented `experimental.msc2858_enabled` config option. Client authors should update their clients to use the stable API, available since Synapse 1.30. - Features -------- -- cgit 1.5.1 From c17e698e1bcc75fdccc73db4cee0b65e6d1430c6 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 21 Sep 2021 12:01:54 +0100 Subject: Point to upgrade notes --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 80a14020d1..2878b67e72 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,8 @@ Synapse 1.43.0 (2021-09-21) This release drops support for the deprecated, unstable API for [MSC2858](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), as well as the undocumented `experimental.msc2858_enabled` config option. Client authors should update their clients to use the stable API, available since Synapse 1.30. +The documentation has been updated with configuration for routing `/spaces`, `/heirarchy` and `/summary` to workers. See [the upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.43/docs/upgrade.md#upgrading-to-v1430) for more details. + No significant changes since 1.43.0rc2. Synapse 1.43.0rc2 (2021-09-17) -- cgit 1.5.1 From 9b5782d51d52b343dc4d633d7c0bf8a3b64f42c5 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 21 Sep 2021 12:10:50 +0100 Subject: Specify MSC name; fix typo one day I'll learn how to spell hierarchy --- CHANGES.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2878b67e72..341281751f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,11 +1,11 @@ Synapse 1.43.0 (2021-09-21) =========================== -This release drops support for the deprecated, unstable API for [MSC2858](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), as well as the undocumented `experimental.msc2858_enabled` config option. Client authors should update their clients to use the stable API, available since Synapse 1.30. +This release drops support for the deprecated, unstable API for [MSC2858 (Multiple SSO Identity Providers)](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), as well as the undocumented `experimental.msc2858_enabled` config option. Client authors should update their clients to use the stable API, available since Synapse 1.30. -The documentation has been updated with configuration for routing `/spaces`, `/heirarchy` and `/summary` to workers. See [the upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.43/docs/upgrade.md#upgrading-to-v1430) for more details. +The documentation has been updated with configuration for routing `/spaces`, `/hierarchy` and `/summary` to workers. See [the upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.43/docs/upgrade.md#upgrading-to-v1430) for more details. -No significant changes since 1.43.0rc2. +No significant changes sheirarchyince 1.43.0rc2. Synapse 1.43.0rc2 (2021-09-17) ============================== -- cgit 1.5.1 From 9ffa787eb243c98a6ca1ecd9eac4a6b5dac2bef0 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 21 Sep 2021 12:24:47 +0100 Subject: Fix typo again --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 341281751f..652f4b7955 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,7 +5,7 @@ This release drops support for the deprecated, unstable API for [MSC2858 (Multip The documentation has been updated with configuration for routing `/spaces`, `/hierarchy` and `/summary` to workers. See [the upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.43/docs/upgrade.md#upgrading-to-v1430) for more details. -No significant changes sheirarchyince 1.43.0rc2. +No significant changes since 1.43.0rc2. Synapse 1.43.0rc2 (2021-09-17) ============================== -- cgit 1.5.1 From 60453315bdbbbd364f13ca386de965e015f1062f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 21 Sep 2021 13:02:34 +0100 Subject: Always add local users to the user directory (#10796) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a simplification, but one that'll help make the user directory logic easier to follow with the other changes upcoming. It's not strictly required for those changes, but this will help simplify the resulting logic that listens for `m.room.member` events and generally make the logic easier to follow. This means the config option `search_all_users` ends up controlling the search query only, and not the data we store. The cost of doing so is an extra row in the `user_directory` and `user_directory_search` tables for each local user which - belongs to no public rooms - belongs to no private rooms of size ≥ 2 I think the cost of this will be marginal (since they'll already have entries in `users` and `profiles` anyway). As a small upside, a homeserver whose directory was built with this change can toggle `search_all_users` without having to rebuild their directory. Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/10796.misc | 1 + docs/sample_config.yaml | 14 +++++++----- synapse/config/user_directory.py | 14 +++++++----- synapse/handlers/deactivate_account.py | 7 ++---- synapse/handlers/profile.py | 18 +++++++--------- synapse/handlers/register.py | 9 ++++---- synapse/storage/databases/main/user_directory.py | 27 +++++++++--------------- tests/handlers/test_profile.py | 7 ++++-- tests/rest/client/test_rooms.py | 12 +++++------ 9 files changed, 54 insertions(+), 55 deletions(-) create mode 100644 changelog.d/10796.misc diff --git a/changelog.d/10796.misc b/changelog.d/10796.misc new file mode 100644 index 0000000000..1873b2386a --- /dev/null +++ b/changelog.d/10796.misc @@ -0,0 +1 @@ +Simplify the internal logic which maintains the user directory database tables. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 95cca16552..166cec38d3 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2362,12 +2362,16 @@ user_directory: #enabled: false # Defines whether to search all users visible to your HS when searching - # the user directory, rather than limiting to users visible in public - # rooms. Defaults to false. + # the user directory. If false, search results will only contain users + # visible in public rooms and users sharing a room with the requester. + # Defaults to false. # - # If you set it true, you'll have to rebuild the user_directory search - # indexes, see: - # https://matrix-org.github.io/synapse/latest/user_directory.html + # NB. If you set this to true, and the last time the user_directory search + # indexes were (re)built was before Synapse 1.44, you'll have to + # rebuild the indexes in order to search through all known users. + # These indexes are built the first time Synapse starts; admins can + # manually trigger a rebuild following the instructions at + # https://matrix-org.github.io/synapse/latest/user_directory.html # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index b10df8a232..2552f688d0 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -45,12 +45,16 @@ class UserDirectoryConfig(Config): #enabled: false # Defines whether to search all users visible to your HS when searching - # the user directory, rather than limiting to users visible in public - # rooms. Defaults to false. + # the user directory. If false, search results will only contain users + # visible in public rooms and users sharing a room with the requester. + # Defaults to false. # - # If you set it true, you'll have to rebuild the user_directory search - # indexes, see: - # https://matrix-org.github.io/synapse/latest/user_directory.html + # NB. If you set this to true, and the last time the user_directory search + # indexes were (re)built was before Synapse 1.44, you'll have to + # rebuild the indexes in order to search through all known users. + # These indexes are built the first time Synapse starts; admins can + # manually trigger a rebuild following the instructions at + # https://matrix-org.github.io/synapse/latest/user_directory.html # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index dcd320c555..a03ff9842b 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -257,11 +257,8 @@ class DeactivateAccountHandler(BaseHandler): """ # Add the user to the directory, if necessary. user = UserID.from_string(user_id) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(user.localpart) - await self.user_directory_handler.handle_local_profile_change( - user_id, profile - ) + profile = await self.store.get_profileinfo(user.localpart) + await self.user_directory_handler.handle_local_profile_change(user_id, profile) # Ensure the user is not marked as erased. await self.store.mark_user_not_erased(user_id) diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 246eb98282..f06070bfcf 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -214,11 +214,10 @@ class ProfileHandler(BaseHandler): target_user.localpart, displayname_to_set ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(target_user.localpart) - await self.user_directory_handler.handle_local_profile_change( - target_user.to_string(), profile - ) + profile = await self.store.get_profileinfo(target_user.localpart) + await self.user_directory_handler.handle_local_profile_change( + target_user.to_string(), profile + ) await self._update_join_states(requester, target_user) @@ -300,11 +299,10 @@ class ProfileHandler(BaseHandler): target_user.localpart, avatar_url_to_set ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(target_user.localpart) - await self.user_directory_handler.handle_local_profile_change( - target_user.to_string(), profile - ) + profile = await self.store.get_profileinfo(target_user.localpart) + await self.user_directory_handler.handle_local_profile_change( + target_user.to_string(), profile + ) await self._update_join_states(requester, target_user) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index efb7d26760..1c195c65db 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -295,11 +295,10 @@ class RegistrationHandler(BaseHandler): shadow_banned=shadow_banned, ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(localpart) - await self.user_directory_handler.handle_local_profile_change( - user_id, profile - ) + profile = await self.store.get_profileinfo(localpart) + await self.user_directory_handler.handle_local_profile_change( + user_id, profile + ) else: # autogen a sequential user ID diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 8aebdc2817..718f3e9976 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -85,19 +85,17 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) del rooms - # If search all users is on, get all the users we want to add. - if self.hs.config.user_directory_search_all_users: - sql = ( - "CREATE TABLE IF NOT EXISTS " - + TEMP_TABLE - + "_users(user_id TEXT NOT NULL)" - ) - txn.execute(sql) + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_users(user_id TEXT NOT NULL)" + ) + txn.execute(sql) - txn.execute("SELECT name FROM users") - users = [{"user_id": x[0]} for x in txn.fetchall()] + txn.execute("SELECT name FROM users") + users = [{"user_id": x[0]} for x in txn.fetchall()] - self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) + self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) new_pos = await self.get_max_stream_id_in_current_state_deltas() await self.db_pool.runInteraction( @@ -265,13 +263,8 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): async def _populate_user_directory_process_users(self, progress, batch_size): """ - If search_all_users is enabled, add all of the users to the user directory. + Add all local users to the user directory. """ - if not self.hs.config.user_directory_search_all_users: - await self.db_pool.updates._end_background_update( - "populate_user_directory_process_users" - ) - return 1 def _get_next_batch(txn): sql = "SELECT user_id FROM %s LIMIT %s" % ( diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 2928c4f48c..57cc3e2646 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -16,6 +16,7 @@ from unittest.mock import Mock import synapse.types from synapse.api.errors import AuthError, SynapseError +from synapse.rest import admin from synapse.types import UserID from tests import unittest @@ -25,6 +26,8 @@ from tests.test_utils import make_awaitable class ProfileTestCase(unittest.HomeserverTestCase): """Tests profile management.""" + servlets = [admin.register_servlets] + def make_homeserver(self, reactor, clock): self.mock_federation = Mock() self.mock_registry = Mock() @@ -46,11 +49,11 @@ class ProfileTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): self.store = hs.get_datastore() - self.frank = UserID.from_string("@1234ABCD:test") + self.frank = UserID.from_string("@1234abcd:test") self.bob = UserID.from_string("@4567:test") self.alice = UserID.from_string("@alice:remote") - self.get_success(self.store.create_profile(self.frank.localpart)) + self.get_success(self.register_user(self.frank.localpart, "frankpassword")) self.handler = hs.get_profile_handler() diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 5a01765f4d..ef847f0f5f 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -869,6 +869,12 @@ class RoomJoinRatelimitTestCase(RoomBase): room.register_servlets, ] + def prepare(self, reactor, clock, homeserver): + super().prepare(reactor, clock, homeserver) + # profile changes expect that the user is actually registered + user = UserID.from_string(self.user_id) + self.get_success(self.register_user(user.localpart, "supersecretpassword")) + @unittest.override_config( {"rc_joins": {"local": {"per_second": 0.5, "burst_count": 3}}} ) @@ -898,12 +904,6 @@ class RoomJoinRatelimitTestCase(RoomBase): # join in a second. room_ids.append(self.helper.create_room_as(self.user_id)) - # Create a profile for the user, since it hasn't been done on registration. - store = self.hs.get_datastore() - self.get_success( - store.create_profile(UserID.from_string(self.user_id).localpart) - ) - # Update the display name for the user. path = "/_matrix/client/r0/profile/%s/displayname" % self.user_id channel = self.make_request("PUT", path, {"displayname": "John Doe"}) -- cgit 1.5.1 From ee557b5375e376e5664f6a3e372c946f7a754f75 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 21 Sep 2021 08:10:01 -0500 Subject: Rename `/batch_send` query parameter from `?prev_event` to more obvious usage with `?prev_event_id` (MSC2716) (#10839) As mentioned in https://github.com/matrix-org/matrix-doc/pull/2716#discussion_r705872887 and https://github.com/matrix-org/synapse/issues/10737 --- changelog.d/10839.misc | 1 + synapse/rest/client/room_batch.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 changelog.d/10839.misc diff --git a/changelog.d/10839.misc b/changelog.d/10839.misc new file mode 100644 index 0000000000..d0e10f31d5 --- /dev/null +++ b/changelog.d/10839.misc @@ -0,0 +1 @@ +Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` query parameter from `?prev_event` to more obvious usage with `?prev_event_id`. diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index d466edeec2..f73ccc7f65 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -61,7 +61,7 @@ class RoomBatchSendEventRestServlet(RestServlet): some messages, you can only insert older ones after that. tldr; Insert chunks from your most recent history -> oldest history. - POST /_matrix/client/unstable/org.matrix.msc2716/rooms//batch_send?prev_event=&chunk_id= + POST /_matrix/client/unstable/org.matrix.msc2716/rooms//batch_send?prev_event_id=&chunk_id= { "events": [ ... ], "state_events_at_start": [ ... ] @@ -188,24 +188,26 @@ class RoomBatchSendEventRestServlet(RestServlet): assert_params_in_dict(body, ["state_events_at_start", "events"]) assert request.args is not None - prev_events_from_query = parse_strings_from_args(request.args, "prev_event") + prev_event_ids_from_query = parse_strings_from_args( + request.args, "prev_event_id" + ) chunk_id_from_query = parse_string(request, "chunk_id") - if prev_events_from_query is None: + if prev_event_ids_from_query is None: raise SynapseError( HTTPStatus.BAD_REQUEST, "prev_event query parameter is required when inserting historical messages back in time", errcode=Codes.MISSING_PARAM, ) - # For the event we are inserting next to (`prev_events_from_query`), + # For the event we are inserting next to (`prev_event_ids_from_query`), # find the most recent auth events (derived from state events) that # allowed that message to be sent. We will use that as a base # to auth our historical messages against. ( most_recent_prev_event_id, _, - ) = await self.store.get_max_depth_of(prev_events_from_query) + ) = await self.store.get_max_depth_of(prev_event_ids_from_query) # mapping from (type, state_key) -> state_event_id prev_state_map = await self.state_store.get_state_ids_for_event( most_recent_prev_event_id @@ -286,7 +288,7 @@ class RoomBatchSendEventRestServlet(RestServlet): events_to_create = body["events"] inherited_depth = await self._inherit_depth_from_prev_ids( - prev_events_from_query + prev_event_ids_from_query ) # Figure out which chunk to connect to. If they passed in @@ -321,7 +323,7 @@ class RoomBatchSendEventRestServlet(RestServlet): # an insertion event), in which case we just create a new insertion event # that can then get pointed to by a "marker" event later. else: - prev_event_ids = prev_events_from_query + prev_event_ids = prev_event_ids_from_query base_insertion_event_dict = self._create_insertion_event_dict( sender=requester.user.to_string(), -- cgit 1.5.1 From 5fca3c8ae62c66a1777bcb85c98679669369b061 Mon Sep 17 00:00:00 2001 From: Hillery Shay Date: Tue, 21 Sep 2021 08:04:35 -0700 Subject: Allow Synapse Admin API's Room Search to accept non-ASCII characters (#10859) * add tests for checking if room search works with non-ascii char * change encoding on parse_string to UTF-8 * lints * properly encode search term * lints * add changelog file * update changelog number * set changelog entry filetype to .bugfix * Revert "set changelog entry filetype to .bugfix" This reverts commit be8e5a314251438ec4ec7dbc59ba32162c93e550. * update changelog message and file type * change parse_string default encoding back to ascii and update room search admin api calll to parse string * refactor tests * Update tests/rest/admin/test_room.py Co-authored-by: Patrick Cloke Co-authored-by: Patrick Cloke --- changelog.d/10859.bugfix | 1 + synapse/rest/admin/rooms.py | 2 +- tests/rest/admin/test_room.py | 27 +++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10859.bugfix diff --git a/changelog.d/10859.bugfix b/changelog.d/10859.bugfix new file mode 100644 index 0000000000..c1bfe22d54 --- /dev/null +++ b/changelog.d/10859.bugfix @@ -0,0 +1 @@ +Fix a bug in Unicode support of the room search admin API. It is now possible to search for rooms with non-ASCII characters. \ No newline at end of file diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index ad83d4b54c..8f781f745f 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -125,7 +125,7 @@ class ListRoomRestServlet(RestServlet): errcode=Codes.INVALID_PARAM, ) - search_term = parse_string(request, "search_term") + search_term = parse_string(request, "search_term", encoding="utf-8") if search_term == "": raise SynapseError( 400, diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 40e032df7f..e798513ac1 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -941,6 +941,33 @@ class RoomTestCase(unittest.HomeserverTestCase): _search_test(None, "bar") _search_test(None, "", expected_http_code=400) + def test_search_term_non_ascii(self): + """Test that searching for a room with non-ASCII characters works correctly""" + + # Create test room + room_id = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok) + room_name = "ж" + + # Set the name for the room + self.helper.send_state( + room_id, + "m.room.name", + {"name": room_name}, + tok=self.admin_user_tok, + ) + + # make the request and test that the response is what we wanted + search_term = urllib.parse.quote("ж", "utf-8") + url = "/_synapse/admin/v1/rooms?search_term=%s" % (search_term,) + channel = self.make_request( + "GET", + url.encode("ascii"), + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(room_id, channel.json_body.get("rooms")[0].get("room_id")) + self.assertEqual("ж", channel.json_body.get("rooms")[0].get("name")) + def test_single_room(self): """Test that a single room can be requested correctly""" # Create two test rooms -- cgit 1.5.1 From 2843058a8b5456dd63b83ad39a992d5d1a285eb6 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 21 Sep 2021 17:40:20 +0200 Subject: Test that state events sent by modules correctly end up in the room's state (#10835) Test for #10830 Ideally the test would also make sure the new state event comes down sync, but this is probably good enough. --- changelog.d/10835.misc | 1 + tests/rest/client/test_third_party_rules.py | 84 +++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 changelog.d/10835.misc diff --git a/changelog.d/10835.misc b/changelog.d/10835.misc new file mode 100644 index 0000000000..0c3d13477e --- /dev/null +++ b/changelog.d/10835.misc @@ -0,0 +1 @@ +Add a test to ensure state events sent by modules get persisted correctly. diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py index 0ae4029640..38ac9be113 100644 --- a/tests/rest/client/test_third_party_rules.py +++ b/tests/rest/client/test_third_party_rules.py @@ -15,6 +15,7 @@ import threading from typing import Dict from unittest.mock import Mock +from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.events.third_party_rules import load_legacy_third_party_event_rules from synapse.module_api import ModuleApi @@ -327,3 +328,86 @@ class ThirdPartyRulesTestCase(unittest.HomeserverTestCase): correctly. """ self.helper.create_room_as(self.user_id, tok=self.tok, expect_code=403) + + def test_sent_event_end_up_in_room_state(self): + """Tests that a state event sent by a module while processing another state event + doesn't get dropped from the state of the room. This is to guard against a bug + where Synapse has been observed doing so, see https://github.com/matrix-org/synapse/issues/10830 + """ + event_type = "org.matrix.test_state" + + # This content will be updated later on, and since we actually use a reference on + # the dict it does the right thing. It's a bit hacky but a handy way of making + # sure the state actually gets updated. + event_content = {"i": -1} + + api = self.hs.get_module_api() + + # Define a callback that sends a custom event on power levels update. + async def test_fn(event: EventBase, state_events): + if event.is_state and event.type == EventTypes.PowerLevels: + await api.create_and_send_event_into_room( + { + "room_id": event.room_id, + "sender": event.sender, + "type": event_type, + "content": event_content, + "state_key": "", + } + ) + return True, None + + self.hs.get_third_party_event_rules()._check_event_allowed_callbacks = [test_fn] + + # Sometimes the bug might not happen the first time the event type is added + # to the state but might happen when an event updates the state of the room for + # that type, so we test updating the state several times. + for i in range(5): + # Update the content of the custom state event to be sent by the callback. + event_content["i"] = i + + # Update the room's power levels with a different value each time so Synapse + # doesn't consider an update redundant. + self._update_power_levels(event_default=i) + + # Check that the new event made it to the room's state. + channel = self.make_request( + method="GET", + path="/rooms/" + self.room_id + "/state/" + event_type, + access_token=self.tok, + ) + + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["i"], i) + + def _update_power_levels(self, event_default: int = 0): + """Updates the room's power levels. + + Args: + event_default: Value to use for 'events_default'. + """ + self.helper.send_state( + room_id=self.room_id, + event_type=EventTypes.PowerLevels, + body={ + "ban": 50, + "events": { + "m.room.avatar": 50, + "m.room.canonical_alias": 50, + "m.room.encryption": 100, + "m.room.history_visibility": 100, + "m.room.name": 50, + "m.room.power_levels": 100, + "m.room.server_acl": 100, + "m.room.tombstone": 100, + }, + "events_default": event_default, + "invite": 0, + "kick": 50, + "redact": 50, + "state_default": 50, + "users": {self.user_id: 100}, + "users_default": 0, + }, + tok=self.tok, + ) -- cgit 1.5.1 From ba7a91aea5fd624bf048f0fda0dca80da7a1945e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 21 Sep 2021 12:09:57 -0400 Subject: Refactor oEmbed previews (#10814) The major change is moving the decision of whether to use oEmbed further up the call-stack. This reverts the _download_url method to being a "dumb" functionwhich takes a single URL and downloads it (as it was before #7920). This also makes more minor refactorings: * Renames internal variables for clarity. * Factors out shared code between the HTML and rich oEmbed previews. * Fixes tests to preview an oEmbed image. --- changelog.d/10814.feature | 1 + docs/development/url_previews.md | 21 +- synapse/rest/media/v1/oembed.py | 145 +++++++----- synapse/rest/media/v1/preview_url_resource.py | 326 +++++++++++++++----------- tests/rest/media/v1/test_url_preview.py | 26 +- 5 files changed, 299 insertions(+), 220 deletions(-) create mode 100644 changelog.d/10814.feature diff --git a/changelog.d/10814.feature b/changelog.d/10814.feature new file mode 100644 index 0000000000..4fa95a6cc9 --- /dev/null +++ b/changelog.d/10814.feature @@ -0,0 +1 @@ +Improve oEmbed previews by processing the author name, photo, and video information. diff --git a/docs/development/url_previews.md b/docs/development/url_previews.md index bbe05e281c..aff3813609 100644 --- a/docs/development/url_previews.md +++ b/docs/development/url_previews.md @@ -25,16 +25,14 @@ When Synapse is asked to preview a URL it does the following: 3. Kicks off a background process to generate a preview: 1. Checks the database cache by URL and timestamp and returns the result if it has not expired and was successful (a 2xx return code). - 2. Checks if the URL matches an oEmbed pattern. If it does, fetch the oEmbed - response. If this is an image, replace the URL to fetch and continue. If - if it is HTML content, use the HTML as the document and continue. - 3. If it doesn't match an oEmbed pattern, downloads the URL and stores it - into a file via the media storage provider and saves the local media - metadata. - 5. If the media is an image: + 2. Checks if the URL matches an [oEmbed](https://oembed.com/) pattern. If it + does, update the URL to download. + 3. Downloads the URL and stores it into a file via the media storage provider + and saves the local media metadata. + 4. If the media is an image: 1. Generates thumbnails. 2. Generates an Open Graph response based on image properties. - 6. If the media is HTML: + 5. If the media is HTML: 1. Decodes the HTML via the stored file. 2. Generates an Open Graph response from the HTML. 3. If an image exists in the Open Graph response: @@ -42,6 +40,13 @@ When Synapse is asked to preview a URL it does the following: provider and saves the local media metadata. 2. Generates thumbnails. 3. Updates the Open Graph response based on image properties. + 6. If the media is JSON and an oEmbed URL was found: + 1. Convert the oEmbed response to an Open Graph response. + 2. If a thumbnail or image is in the oEmbed response: + 1. Downloads the URL and stores it into a file via the media storage + provider and saves the local media metadata. + 2. Generates thumbnails. + 3. Updates the Open Graph response based on image properties. 7. Stores the result in the database cache. 4. Returns the result. diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index 2e6706dbfa..8b74e72655 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import urllib.parse from typing import TYPE_CHECKING, Optional import attr from synapse.http.client import SimpleHttpClient +from synapse.types import JsonDict +from synapse.util import json_decoder if TYPE_CHECKING: from synapse.server import HomeServer @@ -24,18 +27,15 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -@attr.s(slots=True, auto_attribs=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class OEmbedResult: - # Either HTML content or URL must be provided. - html: Optional[str] - url: Optional[str] - title: Optional[str] - # Number of seconds to cache the content. - cache_age: int - - -class OEmbedError(Exception): - """An error occurred processing the oEmbed object.""" + # The Open Graph result (converted from the oEmbed result). + open_graph_result: JsonDict + # Number of seconds to cache the content, according to the oEmbed response. + # + # This will be None if no cache-age is provided in the oEmbed response (or + # if the oEmbed response cannot be turned into an Open Graph response). + cache_age: Optional[int] class OEmbedProvider: @@ -81,75 +81,106 @@ class OEmbedProvider: """ for url_pattern, endpoint in self._oembed_patterns.items(): if url_pattern.fullmatch(url): - return endpoint + # TODO Specify max height / width. + + # Note that only the JSON format is supported, some endpoints want + # this in the URL, others want it as an argument. + endpoint = endpoint.replace("{format}", "json") + + args = {"url": url, "format": "json"} + query_str = urllib.parse.urlencode(args, True) + return f"{endpoint}?{query_str}" # No match. return None - async def get_oembed_content(self, endpoint: str, url: str) -> OEmbedResult: + def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: """ - Request content from an oEmbed endpoint. + Parse the oEmbed response into an Open Graph response. Args: - endpoint: The oEmbed API endpoint. - url: The URL to pass to the API. + url: The URL which is being previewed (not the one which was + requested). + raw_body: The oEmbed response as JSON encoded as bytes. Returns: - An object representing the metadata returned. - - Raises: - OEmbedError if fetching or parsing of the oEmbed information fails. + json-encoded Open Graph data """ - try: - logger.debug("Trying to get oEmbed content for url '%s'", url) - # Note that only the JSON format is supported, some endpoints want - # this in the URL, others want it as an argument. - endpoint = endpoint.replace("{format}", "json") - - result = await self._client.get_json( - endpoint, - # TODO Specify max height / width. - args={"url": url, "format": "json"}, - ) + try: + # oEmbed responses *must* be UTF-8 according to the spec. + oembed = json_decoder.decode(raw_body.decode("utf-8")) # Ensure there's a version of 1.0. - if result.get("version") != "1.0": - raise OEmbedError("Invalid version: %s" % (result.get("version"),)) - - oembed_type = result.get("type") + oembed_version = oembed["version"] + if oembed_version != "1.0": + raise RuntimeError(f"Invalid version: {oembed_version}") # Ensure the cache age is None or an int. - cache_age = result.get("cache_age") + cache_age = oembed.get("cache_age") if cache_age: cache_age = int(cache_age) - oembed_result = OEmbedResult(None, None, result.get("title"), cache_age) + # The results. + open_graph_response = {"og:title": oembed.get("title")} - # HTML content. + # If a thumbnail exists, use it. Note that dimensions will be calculated later. + if "thumbnail_url" in oembed: + open_graph_response["og:image"] = oembed["thumbnail_url"] + + # Process each type separately. + oembed_type = oembed["type"] if oembed_type == "rich": - oembed_result.html = result.get("html") - return oembed_result + calc_description_and_urls(open_graph_response, oembed["html"]) - if oembed_type == "photo": - oembed_result.url = result.get("url") - return oembed_result + elif oembed_type == "photo": + # If this is a photo, use the full image, not the thumbnail. + open_graph_response["og:image"] = oembed["url"] - # TODO Handle link and video types. + else: + raise RuntimeError(f"Unknown oEmbed type: {oembed_type}") - if "thumbnail_url" in result: - oembed_result.url = result.get("thumbnail_url") - return oembed_result + except Exception as e: + # Trap any exception and let the code follow as usual. + logger.warning(f"Error parsing oEmbed metadata from {url}: {e:r}") + open_graph_response = {} + cache_age = None - raise OEmbedError("Incompatible oEmbed information.") + return OEmbedResult(open_graph_response, cache_age) - except OEmbedError as e: - # Trap OEmbedErrors first so we can directly re-raise them. - logger.warning("Error parsing oEmbed metadata from %s: %r", url, e) - raise - except Exception as e: - # Trap any exception and let the code follow as usual. - # FIXME: pass through 404s and other error messages nicely - logger.warning("Error downloading oEmbed metadata from %s: %r", url, e) - raise OEmbedError() from e +def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None: + """ + Calculate description for an HTML document. + + This uses lxml to convert the HTML document into plaintext. If errors + occur during processing of the document, an empty response is returned. + + Args: + open_graph_response: The current Open Graph summary. This is updated with additional fields. + html_body: The HTML document, as bytes. + + Returns: + The summary + """ + # If there's no body, nothing useful is going to be found. + if not html_body: + return + + from lxml import etree + + # Create an HTML parser. If this fails, log and return no metadata. + parser = etree.HTMLParser(recover=True, encoding="utf-8") + + # Attempt to parse the body. If this fails, log and return no metadata. + tree = etree.fromstring(html_body, parser) + + # The data was successfully parsed, but no tree was found. + if tree is None: + return + + from synapse.rest.media.v1.preview_url_resource import _calc_description + + description = _calc_description(tree) + if description: + open_graph_response["og:description"] = description diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index fe0627d9b0..0a0b476d2b 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -44,7 +44,7 @@ from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.media.v1._base import get_filename_from_headers from synapse.rest.media.v1.media_storage import MediaStorage -from synapse.rest.media.v1.oembed import OEmbedError, OEmbedProvider +from synapse.rest.media.v1.oembed import OEmbedProvider from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.async_helpers import ObservableDeferred @@ -73,6 +73,7 @@ OG_TAG_NAME_MAXLEN = 50 OG_TAG_VALUE_MAXLEN = 1000 ONE_HOUR = 60 * 60 * 1000 +ONE_DAY = 24 * ONE_HOUR @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -255,10 +256,19 @@ class PreviewUrlResource(DirectServeJsonResource): og = og.encode("utf8") return og - media_info = await self._download_url(url, user) + # If this URL can be accessed via oEmbed, use that instead. + url_to_download = url + oembed_url = self._oembed.get_oembed_url(url) + if oembed_url: + url_to_download = oembed_url + + media_info = await self._download_url(url_to_download, user) logger.debug("got media_info of '%s'", media_info) + # The number of milliseconds that the response should be considered valid. + expiration_ms = media_info.expires + if _is_media(media_info.media_type): file_id = media_info.filesystem_id dims = await self.media_repo._generate_thumbnails( @@ -288,34 +298,22 @@ class PreviewUrlResource(DirectServeJsonResource): encoding = get_html_media_encoding(body, media_info.media_type) og = decode_and_calc_og(body, media_info.uri, encoding) - # pre-cache the image for posterity - # FIXME: it might be cleaner to use the same flow as the main /preview_url - # request itself and benefit from the same caching etc. But for now we - # just rely on the caching on the master request to speed things up. - if "og:image" in og and og["og:image"]: - image_info = await self._download_url( - _rebase_url(og["og:image"], media_info.uri), user - ) + await self._precache_image_url(user, media_info, og) + + elif oembed_url and _is_json(media_info.media_type): + # Handle an oEmbed response. + with open(media_info.filename, "rb") as file: + body = file.read() + + oembed_response = self._oembed.parse_oembed_response(media_info.uri, body) + og = oembed_response.open_graph_result + + # Use the cache age from the oEmbed result, instead of the HTTP response. + if oembed_response.cache_age is not None: + expiration_ms = oembed_response.cache_age + + await self._precache_image_url(user, media_info, og) - if _is_media(image_info.media_type): - # TODO: make sure we don't choke on white-on-transparent images - file_id = image_info.filesystem_id - dims = await self.media_repo._generate_thumbnails( - None, file_id, file_id, image_info.media_type, url_cache=True - ) - if dims: - og["og:image:width"] = dims["width"] - og["og:image:height"] = dims["height"] - else: - logger.warning("Couldn't get dims for %s", og["og:image"]) - - og[ - "og:image" - ] = f"mxc://{self.server_name}/{image_info.filesystem_id}" - og["og:image:type"] = image_info.media_type - og["matrix:image:size"] = image_info.media_length - else: - del og["og:image"] else: logger.warning("Failed to find any OG data in %s", url) og = {} @@ -336,12 +334,15 @@ class PreviewUrlResource(DirectServeJsonResource): jsonog = json_encoder.encode(og) + # Cap the amount of time to consider a response valid. + expiration_ms = min(expiration_ms, ONE_DAY) + # store OG in history-aware DB cache await self.store.store_url_cache( url, media_info.response_code, media_info.etag, - media_info.expires + media_info.created_ts_ms, + media_info.created_ts_ms + expiration_ms, jsonog, media_info.filesystem_id, media_info.created_ts_ms, @@ -358,88 +359,52 @@ class PreviewUrlResource(DirectServeJsonResource): file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True) - # If this URL can be accessed via oEmbed, use that instead. - url_to_download: Optional[str] = url - oembed_url = self._oembed.get_oembed_url(url) - if oembed_url: - # The result might be a new URL to download, or it might be HTML content. + with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: - oembed_result = await self._oembed.get_oembed_content(oembed_url, url) - if oembed_result.url: - url_to_download = oembed_result.url - elif oembed_result.html: - url_to_download = None - except OEmbedError: - # If an error occurs, try doing a normal preview. - pass + logger.debug("Trying to get preview for url '%s'", url) + length, headers, uri, code = await self.client.get_file( + url, + output_stream=f, + max_size=self.max_spider_size, + headers={"Accept-Language": self.url_preview_accept_language}, + ) + except SynapseError: + # Pass SynapseErrors through directly, so that the servlet + # handler will return a SynapseError to the client instead of + # blank data or a 500. + raise + except DNSLookupError: + # DNS lookup returned no results + # Note: This will also be the case if one of the resolved IP + # addresses is blacklisted + raise SynapseError( + 502, + "DNS resolution failure during URL preview generation", + Codes.UNKNOWN, + ) + except Exception as e: + # FIXME: pass through 404s and other error messages nicely + logger.warning("Error downloading %s: %r", url, e) - if url_to_download: - with self.media_storage.store_into_file(file_info) as (f, fname, finish): - try: - logger.debug("Trying to get preview for url '%s'", url_to_download) - length, headers, uri, code = await self.client.get_file( - url_to_download, - output_stream=f, - max_size=self.max_spider_size, - headers={"Accept-Language": self.url_preview_accept_language}, - ) - except SynapseError: - # Pass SynapseErrors through directly, so that the servlet - # handler will return a SynapseError to the client instead of - # blank data or a 500. - raise - except DNSLookupError: - # DNS lookup returned no results - # Note: This will also be the case if one of the resolved IP - # addresses is blacklisted - raise SynapseError( - 502, - "DNS resolution failure during URL preview generation", - Codes.UNKNOWN, - ) - except Exception as e: - # FIXME: pass through 404s and other error messages nicely - logger.warning("Error downloading %s: %r", url_to_download, e) - - raise SynapseError( - 500, - "Failed to download content: %s" - % (traceback.format_exception_only(sys.exc_info()[0], e),), - Codes.UNKNOWN, - ) - await finish() - - if b"Content-Type" in headers: - media_type = headers[b"Content-Type"][0].decode("ascii") - else: - media_type = "application/octet-stream" + raise SynapseError( + 500, + "Failed to download content: %s" + % (traceback.format_exception_only(sys.exc_info()[0], e),), + Codes.UNKNOWN, + ) + await finish() - download_name = get_filename_from_headers(headers) + if b"Content-Type" in headers: + media_type = headers[b"Content-Type"][0].decode("ascii") + else: + media_type = "application/octet-stream" - # FIXME: we should calculate a proper expiration based on the - # Cache-Control and Expire headers. But for now, assume 1 hour. - expires = ONE_HOUR - etag = ( - headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None - ) - else: - # we can only get here if we did an oembed request and have an oembed_result.html - assert oembed_result.html is not None - assert oembed_url is not None - - html_bytes = oembed_result.html.encode("utf-8") - with self.media_storage.store_into_file(file_info) as (f, fname, finish): - f.write(html_bytes) - await finish() - - media_type = "text/html" - download_name = oembed_result.title - length = len(html_bytes) - # If a specific cache age was not given, assume 1 hour. - expires = oembed_result.cache_age or ONE_HOUR - uri = oembed_url - code = 200 - etag = None + download_name = get_filename_from_headers(headers) + + # FIXME: we should calculate a proper expiration based on the + # Cache-Control and Expire headers. But for now, assume 1 hour. + expires = ONE_HOUR + etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None try: time_now_ms = self.clock.time_msec() @@ -474,6 +439,46 @@ class PreviewUrlResource(DirectServeJsonResource): etag=etag, ) + async def _precache_image_url( + self, user: str, media_info: MediaInfo, og: JsonDict + ) -> None: + """ + Pre-cache the image (if one exists) for posterity + + Args: + user: The user requesting the preview. + media_info: The media being previewed. + og: The Open Graph dictionary. This is modified with image information. + """ + # If there's no image or it is blank, there's nothing to do. + if "og:image" not in og or not og["og:image"]: + return + + # FIXME: it might be cleaner to use the same flow as the main /preview_url + # request itself and benefit from the same caching etc. But for now we + # just rely on the caching on the master request to speed things up. + image_info = await self._download_url( + _rebase_url(og["og:image"], media_info.uri), user + ) + + if _is_media(image_info.media_type): + # TODO: make sure we don't choke on white-on-transparent images + file_id = image_info.filesystem_id + dims = await self.media_repo._generate_thumbnails( + None, file_id, file_id, image_info.media_type, url_cache=True + ) + if dims: + og["og:image:width"] = dims["width"] + og["og:image:height"] = dims["height"] + else: + logger.warning("Couldn't get dims for %s", og["og:image"]) + + og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}" + og["og:image:type"] = image_info.media_type + og["matrix:image:size"] = image_info.media_length + else: + del og["og:image"] + def _start_expire_url_cache_data(self) -> Deferred: return run_as_background_process( "expire_url_cache_data", self._expire_url_cache_data @@ -527,7 +532,7 @@ class PreviewUrlResource(DirectServeJsonResource): # These may be cached for a bit on the client (i.e., they # may have a room open with a preview url thing open). # So we wait a couple of days before deleting, just in case. - expire_before = now - 2 * 24 * ONE_HOUR + expire_before = now - 2 * ONE_DAY media_ids = await self.store.get_url_cache_media_before(expire_before) removed_media = [] @@ -669,7 +674,18 @@ def decode_and_calc_og( def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: - # suck our tree into lxml and define our OG response. + """ + Calculate metadata for an HTML document. + + This uses lxml to search the HTML document for Open Graph data. + + Args: + tree: The parsed HTML document. + media_url: The URI used to download the body. + + Returns: + The Open Graph response as a dictionary. + """ # if we see any image URLs in the OG response, then spider them # (although the client could choose to do this by asking for previews of those @@ -743,35 +759,7 @@ def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: if meta_description: og["og:description"] = meta_description[0] else: - # grab any text nodes which are inside the tag... - # unless they are within an HTML5 semantic markup tag... - #
,