diff options
author | Jason Little <realtyem@gmail.com> | 2023-03-03 05:54:53 -0600 |
---|---|---|
committer | Jason Little <realtyem@gmail.com> | 2023-03-03 05:54:53 -0600 |
commit | 5dc33182994860e2a2ddf3cf75d51087a0b31b15 (patch) | |
tree | 3cb15b8da24c9800c8dcddb3d7b5a57b9ba02eaa | |
parent | Apply changes from review. (diff) | |
parent | Experimental MSC3890 Implementation: Fix deleting account data when using an ... (diff) | |
download | synapse-5dc33182994860e2a2ddf3cf75d51087a0b31b15.tar.xz |
Merge branch 'develop' into comp-worker-shorthand
147 files changed, 3428 insertions, 2223 deletions
diff --git a/.ci/scripts/calculate_jobs.py b/.ci/scripts/calculate_jobs.py index 0cdc20e19c..b41ec0b6e2 100755 --- a/.ci/scripts/calculate_jobs.py +++ b/.ci/scripts/calculate_jobs.py @@ -109,12 +109,27 @@ sytest_tests = [ "postgres": "multi-postgres", "workers": "workers", }, + { + "sytest-tag": "focal", + "postgres": "multi-postgres", + "workers": "workers", + "reactor": "asyncio", + }, ] if not IS_PR: sytest_tests.extend( [ { + "sytest-tag": "focal", + "reactor": "asyncio", + }, + { + "sytest-tag": "focal", + "postgres": "postgres", + "reactor": "asyncio", + }, + { "sytest-tag": "testing", "postgres": "postgres", }, diff --git a/.github/workflows/docs-pr-netlify.yaml b/.github/workflows/docs-pr-netlify.yaml index 1704b3ce93..a5e74eb297 100644 --- a/.github/workflows/docs-pr-netlify.yaml +++ b/.github/workflows/docs-pr-netlify.yaml @@ -14,7 +14,7 @@ jobs: # There's a 'download artifact' action, but it hasn't been updated for the workflow_run action # (https://github.com/actions/download-artifact/issues/60) so instead we get this mess: - name: 📥 Download artifact - uses: dawidd6/action-download-artifact@b59d8c6a6c5c6c6437954f470d963c0b20ea7415 # v2.25.0 + uses: dawidd6/action-download-artifact@5e780fc7bbd0cac69fc73271ed86edf5dcb72d67 # v2.26.0 with: workflow: docs-pr.yaml run_id: ${{ github.event.workflow_run.id }} diff --git a/.github/workflows/docs-pr.yaml b/.github/workflows/docs-pr.yaml index d41f6c4490..6634f2644e 100644 --- a/.github/workflows/docs-pr.yaml +++ b/.github/workflows/docs-pr.yaml @@ -12,7 +12,7 @@ jobs: name: GitHub Pages runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup mdbook uses: peaceiris/actions-mdbook@adeb05db28a0c0004681db83893d56c0388ea9ea # v1.2.0 @@ -39,7 +39,7 @@ jobs: name: Check links in documentation runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup mdbook uses: peaceiris/actions-mdbook@adeb05db28a0c0004681db83893d56c0388ea9ea # v1.2.0 diff --git a/.github/workflows/push_complement_image.yml b/.github/workflows/push_complement_image.yml index f26143de6b..b76c4cb323 100644 --- a/.github/workflows/push_complement_image.yml +++ b/.github/workflows/push_complement_image.yml @@ -48,7 +48,7 @@ jobs: with: ref: master - name: Login to registry - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: registry: ghcr.io username: ${{ github.actor }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cfafeaadc9..48a33c2f49 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -368,6 +368,7 @@ jobs: SYTEST_BRANCH: ${{ github.head_ref }} POSTGRES: ${{ matrix.job.postgres && 1}} MULTI_POSTGRES: ${{ (matrix.job.postgres == 'multi-postgres') && 1}} + ASYNCIO_REACTOR: ${{ (matrix.job.reactor == 'asyncio') && 1 }} WORKERS: ${{ matrix.job.workers && 1 }} BLACKLIST: ${{ matrix.job.workers && 'synapse-blacklist-with-workers' }} TOP: ${{ github.workspace }} diff --git a/.github/workflows/triage-incoming.yml b/.github/workflows/triage-incoming.yml index 0f0397cf5b..24dac47bf2 100644 --- a/.github/workflows/triage-incoming.yml +++ b/.github/workflows/triage-incoming.yml @@ -6,7 +6,7 @@ on: jobs: triage: - uses: matrix-org/backend-meta/.github/workflows/triage-incoming.yml@v1 + uses: matrix-org/backend-meta/.github/workflows/triage-incoming.yml@v2 with: project_id: 'PVT_kwDOAIB0Bs4AFDdZ' content_id: ${{ github.event.issue.node_id }} diff --git a/CHANGES.md b/CHANGES.md index f5c19bcb97..644ef6e036 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse 1.78.0 (2023-02-28) +=========================== + +Bugfixes +-------- + +- Fix a bug introduced in Synapse 1.76 where 5s delays would occasionally occur in deployments using workers. ([\#15150](https://github.com/matrix-org/synapse/issues/15150)) + + Synapse 1.78.0rc1 (2023-02-21) ============================== diff --git a/changelog.d/14026.doc b/changelog.d/14026.doc new file mode 100644 index 0000000000..28fc5568ea --- /dev/null +++ b/changelog.d/14026.doc @@ -0,0 +1 @@ +Document how to use caches in a module. diff --git a/changelog.d/14101.misc b/changelog.d/14101.misc new file mode 100644 index 0000000000..c48f40cd38 --- /dev/null +++ b/changelog.d/14101.misc @@ -0,0 +1 @@ +Run the integration test suites with the asyncio reactor enabled in CI. diff --git a/changelog.d/14869.bugfix b/changelog.d/14869.bugfix new file mode 100644 index 0000000000..865b597741 --- /dev/null +++ b/changelog.d/14869.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.75.0rc1 that caused experimental support for deleting account data to raise an internal server error while using an account data writer worker. \ No newline at end of file diff --git a/changelog.d/14918.misc b/changelog.d/14918.misc new file mode 100644 index 0000000000..828794354a --- /dev/null +++ b/changelog.d/14918.misc @@ -0,0 +1 @@ +Batch up storing state groups when creating a new room. \ No newline at end of file diff --git a/changelog.d/15044.feature b/changelog.d/15044.feature new file mode 100644 index 0000000000..91e5cda8c3 --- /dev/null +++ b/changelog.d/15044.feature @@ -0,0 +1 @@ +Add two new Third Party Rules module API callbacks: [`on_add_user_third_party_identifier`](https://matrix-org.github.io/synapse/v1.79/modules/third_party_rules_callbacks.html#on_add_user_third_party_identifier) and [`on_remove_user_third_party_identifier`](https://matrix-org.github.io/synapse/v1.79/modules/third_party_rules_callbacks.html#on_remove_user_third_party_identifier). \ No newline at end of file diff --git a/changelog.d/15051.misc b/changelog.d/15051.misc new file mode 100644 index 0000000000..fabfe77d35 --- /dev/null +++ b/changelog.d/15051.misc @@ -0,0 +1 @@ +Update [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952) support based on changes to the MSC. diff --git a/changelog.d/15077.feature b/changelog.d/15077.feature new file mode 100644 index 0000000000..384e751056 --- /dev/null +++ b/changelog.d/15077.feature @@ -0,0 +1 @@ +Experimental support for MSC3967 to not require UIA for setting up cross-signing on first use. diff --git a/changelog.d/15088.bugfix b/changelog.d/15088.bugfix new file mode 100644 index 0000000000..15d5286f80 --- /dev/null +++ b/changelog.d/15088.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where Synapse handled an unspecced field on push rules. diff --git a/changelog.d/15116.feature b/changelog.d/15116.feature new file mode 100644 index 0000000000..087d8dc7f1 --- /dev/null +++ b/changelog.d/15116.feature @@ -0,0 +1 @@ +Add an [admin API](https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/index.html) to delete a [specific event report](https://spec.matrix.org/v1.6/client-server-api/#reporting-content). \ No newline at end of file diff --git a/changelog.d/15133.feature b/changelog.d/15133.feature new file mode 100644 index 0000000000..e0af0d4554 --- /dev/null +++ b/changelog.d/15133.feature @@ -0,0 +1 @@ +Add support for knocking to workers. \ No newline at end of file diff --git a/changelog.d/15134.feature b/changelog.d/15134.feature new file mode 100644 index 0000000000..0dbb30bc8f --- /dev/null +++ b/changelog.d/15134.feature @@ -0,0 +1 @@ +Allow use of the `/filter` Client-Server APIs on workers. \ No newline at end of file diff --git a/changelog.d/15143.misc b/changelog.d/15143.misc new file mode 100644 index 0000000000..cff4518811 --- /dev/null +++ b/changelog.d/15143.misc @@ -0,0 +1 @@ +Fix a long-standing bug where the user directory search was not case-insensitive for accented characters. diff --git a/changelog.d/15146.misc b/changelog.d/15146.misc new file mode 100644 index 0000000000..8de5f95239 --- /dev/null +++ b/changelog.d/15146.misc @@ -0,0 +1 @@ +Refactor the media modules. diff --git a/changelog.d/15148.doc b/changelog.d/15148.doc new file mode 100644 index 0000000000..4e9e163306 --- /dev/null +++ b/changelog.d/15148.doc @@ -0,0 +1 @@ +Correct small documentation errors in some `MatrixFederationHttpClient` methods. \ No newline at end of file diff --git a/changelog.d/15152.misc b/changelog.d/15152.misc new file mode 100644 index 0000000000..6b2c73d0ab --- /dev/null +++ b/changelog.d/15152.misc @@ -0,0 +1 @@ +Bump dawidd6/action-download-artifact from 2.25.0 to 2.26.0. diff --git a/changelog.d/15154.misc b/changelog.d/15154.misc new file mode 100644 index 0000000000..c958b52078 --- /dev/null +++ b/changelog.d/15154.misc @@ -0,0 +1 @@ +Bump docker/login-action from 1 to 2. diff --git a/changelog.d/15155.misc b/changelog.d/15155.misc new file mode 100644 index 0000000000..40c73e96ec --- /dev/null +++ b/changelog.d/15155.misc @@ -0,0 +1 @@ +Bump actions/checkout from 2 to 3. diff --git a/changelog.d/15156.misc b/changelog.d/15156.misc new file mode 100644 index 0000000000..ebae4cb456 --- /dev/null +++ b/changelog.d/15156.misc @@ -0,0 +1 @@ +Bump matrix-org/backend-meta from 1 to 2. diff --git a/changelog.d/15157.misc b/changelog.d/15157.misc new file mode 100644 index 0000000000..730b706dfe --- /dev/null +++ b/changelog.d/15157.misc @@ -0,0 +1 @@ +Bump typing-extensions from 4.4.0 to 4.5.0. diff --git a/changelog.d/15158.misc b/changelog.d/15158.misc new file mode 100644 index 0000000000..fc0eecfd21 --- /dev/null +++ b/changelog.d/15158.misc @@ -0,0 +1 @@ +Bump types-opentracing from 2.4.10.1 to 2.4.10.3. diff --git a/changelog.d/15159.misc b/changelog.d/15159.misc new file mode 100644 index 0000000000..ebb857a89e --- /dev/null +++ b/changelog.d/15159.misc @@ -0,0 +1 @@ +Bump ruff from 0.0.237 to 0.0.252. diff --git a/changelog.d/15160.misc b/changelog.d/15160.misc new file mode 100644 index 0000000000..13b098d17c --- /dev/null +++ b/changelog.d/15160.misc @@ -0,0 +1 @@ +Bump types-setuptools from 67.3.0.1 to 67.4.0.3. diff --git a/changelog.d/15163.bugfix b/changelog.d/15163.bugfix new file mode 100644 index 0000000000..7ff1cd4463 --- /dev/null +++ b/changelog.d/15163.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where an initial sync would not respond to changes to the list of ignored users if there was an initial sync cached. \ No newline at end of file diff --git a/changelog.d/15164.misc b/changelog.d/15164.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/15164.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/changelog.d/15165.misc b/changelog.d/15165.misc new file mode 100644 index 0000000000..a75be84dac --- /dev/null +++ b/changelog.d/15165.misc @@ -0,0 +1 @@ +Move `get_event_report` and `get_event_reports_paginate` from `RoomStore` to `RoomWorkerStore`. \ No newline at end of file diff --git a/changelog.d/15167.misc b/changelog.d/15167.misc new file mode 100644 index 0000000000..175c2a3b83 --- /dev/null +++ b/changelog.d/15167.misc @@ -0,0 +1 @@ +Remove dangling reference to being a reference implementation in docstring. diff --git a/changelog.d/15168.doc b/changelog.d/15168.doc new file mode 100644 index 0000000000..dbd3c54714 --- /dev/null +++ b/changelog.d/15168.doc @@ -0,0 +1 @@ +Correct the description of the behavior of `registration_shared_secret_path` on startup. diff --git a/changelog.d/15172.feature b/changelog.d/15172.feature new file mode 100644 index 0000000000..3f789edb7f --- /dev/null +++ b/changelog.d/15172.feature @@ -0,0 +1 @@ +Remove support for server-side aggregation of reactions. diff --git a/changelog.d/15175.misc b/changelog.d/15175.misc new file mode 100644 index 0000000000..8de5f95239 --- /dev/null +++ b/changelog.d/15175.misc @@ -0,0 +1 @@ +Refactor the media modules. diff --git a/changelog.d/15180.bugfix b/changelog.d/15180.bugfix new file mode 100644 index 0000000000..e7a3dcd41a --- /dev/null +++ b/changelog.d/15180.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.78.0 where requests to claim dehydrated devices would fail with a `405` error. diff --git a/changelog.d/15186.docker b/changelog.d/15186.docker new file mode 100644 index 0000000000..5e436ff7e2 --- /dev/null +++ b/changelog.d/15186.docker @@ -0,0 +1 @@ +Improve startup logging in the with-workers Docker image. diff --git a/changelog.d/15189.misc b/changelog.d/15189.misc new file mode 100644 index 0000000000..ded2feb79e --- /dev/null +++ b/changelog.d/15189.misc @@ -0,0 +1 @@ +Remove the unspecced `PUT` on the `/knock/{roomIdOrAlias}` endpoint. diff --git a/changelog.d/15191.misc b/changelog.d/15191.misc new file mode 100644 index 0000000000..579f76d451 --- /dev/null +++ b/changelog.d/15191.misc @@ -0,0 +1 @@ +Add a `get_next_txn` method to `StreamIdGenerator` to match `MultiWriterIdGenerator`. \ No newline at end of file diff --git a/debian/changelog b/debian/changelog index f9e95ee5e2..0f094308c1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.78.0) stable; urgency=medium + + * New Synapse release 1.78.0. + + -- Synapse Packaging team <packages@matrix.org> Tue, 28 Feb 2023 08:56:03 -0800 + matrix-synapse-py3 (1.78.0~rc1) stable; urgency=medium * Add `matrix-org-archive-keyring` package as recommended. diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 006e5fd8da..ad2b796641 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -168,6 +168,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "^/_matrix/client/(api/v1|r0|v3|unstable/.*)/rooms/.*/aliases", "^/_matrix/client/v1/rooms/.*/timestamp_to_event$", "^/_matrix/client/(api/v1|r0|v3|unstable)/search", + "^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$)", ], "shared_extra_conf": {}, "worker_extra_conf": "", @@ -230,6 +231,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/send", "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$", "^/_matrix/client/(api/v1|r0|v3|unstable)/join/", + "^/_matrix/client/(api/v1|r0|v3|unstable)/knock/", "^/_matrix/client/(api/v1|r0|v3|unstable)/profile/", "^/_matrix/client/(v1|unstable/org.matrix.msc2716)/rooms/.*/batch_send", ], @@ -1051,7 +1053,8 @@ def main(args: List[str], environ: MutableMapping[str, str]) -> None: if not os.path.exists(config_path): log("Generating base homeserver config") generate_base_homeserver_config() - + else: + log("Base homeserver config exists—not regenerating") # This script may be run multiple times (mostly by Complement, see note at top of # file). Don't re-configure workers in this instance. mark_filepath = "/conf/workers_have_been_configured" @@ -1070,11 +1073,14 @@ def main(args: List[str], environ: MutableMapping[str, str]) -> None: requested_worker_types = process_worker_types(worker_types) # Always regenerate all other config files + log("Generating worker config files") generate_worker_files(environ, config_path, data_dir, requested_worker_types) # Mark workers as being configured with open(mark_filepath, "w") as f: f.write("") + else: + log("Worker config exists—not regenerating") # Lifted right out of start.py jemallocpath = "/usr/lib/%s-linux-gnu/libjemalloc.so.2" % (platform.machine(),) diff --git a/docs/admin_api/event_reports.md b/docs/admin_api/event_reports.md index beec8bb7ef..83f7dc37f4 100644 --- a/docs/admin_api/event_reports.md +++ b/docs/admin_api/event_reports.md @@ -169,3 +169,17 @@ The following fields are returned in the JSON response body: * `canonical_alias`: string - The canonical alias of the room. `null` if the room does not have a canonical alias set. * `event_json`: object - Details of the original event that was reported. + +# Delete a specific event report + +This API deletes a specific event report. If the request is successful, the response body +will be an empty JSON object. + +The api is: +``` +DELETE /_synapse/admin/v1/event_reports/<report_id> +``` + +**URL parameters:** + +* `report_id`: string - The ID of the event report. diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index 50969edd46..1a0c6ec954 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -307,8 +307,8 @@ _Changed in Synapse v1.62.0: `synapse.module_api.NOT_SPAM` and `synapse.module_a ```python async def check_media_file_for_spam( - file_wrapper: "synapse.rest.media.v1.media_storage.ReadableFileWrapper", - file_info: "synapse.rest.media.v1._base.FileInfo", + file_wrapper: "synapse.media.media_storage.ReadableFileWrapper", + file_info: "synapse.media._base.FileInfo", ) -> Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes", bool] ``` diff --git a/docs/modules/third_party_rules_callbacks.md b/docs/modules/third_party_rules_callbacks.md index 888e43bd10..4a27d976fb 100644 --- a/docs/modules/third_party_rules_callbacks.md +++ b/docs/modules/third_party_rules_callbacks.md @@ -254,6 +254,11 @@ If multiple modules implement this callback, Synapse runs them all in order. _First introduced in Synapse v1.56.0_ +**<span style="color:red"> +This callback is deprecated in favour of the `on_add_user_third_party_identifier` callback, which +features the same functionality. The only difference is in name. +</span>** + ```python async def on_threepid_bind(user_id: str, medium: str, address: str) -> None: ``` @@ -268,6 +273,44 @@ server_. If multiple modules implement this callback, Synapse runs them all in order. +### `on_add_user_third_party_identifier` + +_First introduced in Synapse v1.79.0_ + +```python +async def on_add_user_third_party_identifier(user_id: str, medium: str, address: str) -> None: +``` + +Called after successfully creating an association between a user and a third-party identifier +(email address, phone number). The module is given the Matrix ID of the user the +association is for, as well as the medium (`email` or `msisdn`) and address of the +third-party identifier (i.e. an email address). + +Note that this callback is _not_ called if a user attempts to bind their third-party identifier +to an identity server (via a call to [`POST +/_matrix/client/v3/account/3pid/bind`](https://spec.matrix.org/v1.5/client-server-api/#post_matrixclientv3account3pidbind)). + +If multiple modules implement this callback, Synapse runs them all in order. + +### `on_remove_user_third_party_identifier` + +_First introduced in Synapse v1.79.0_ + +```python +async def on_remove_user_third_party_identifier(user_id: str, medium: str, address: str) -> None: +``` + +Called after successfully removing an association between a user and a third-party identifier +(email address, phone number). The module is given the Matrix ID of the user the +association is for, as well as the medium (`email` or `msisdn`) and address of the +third-party identifier (i.e. an email address). + +Note that this callback is _not_ called if a user attempts to unbind their third-party +identifier from an identity server (via a call to [`POST +/_matrix/client/v3/account/3pid/unbind`](https://spec.matrix.org/v1.5/client-server-api/#post_matrixclientv3account3pidunbind)). + +If multiple modules implement this callback, Synapse runs them all in order. + ## Example The example below is a module that implements the third-party rules callback @@ -300,4 +343,4 @@ class EventCensorer: ) event_dict["content"] = new_event_content return event_dict -``` +``` \ No newline at end of file diff --git a/docs/modules/writing_a_module.md b/docs/modules/writing_a_module.md index 30de69a533..b99f64b9d8 100644 --- a/docs/modules/writing_a_module.md +++ b/docs/modules/writing_a_module.md @@ -83,3 +83,59 @@ the callback name as the argument name and the function as its value. A Callbacks for each category can be found on their respective page of the [Synapse documentation website](https://matrix-org.github.io/synapse). + +## Caching + +_Added in Synapse 1.74.0._ + +Modules can leverage Synapse's caching tools to manage their own cached functions. This +can be helpful for modules that need to repeatedly request the same data from the database +or a remote service. + +Functions that need to be wrapped with a cache need to be decorated with a `@cached()` +decorator (which can be imported from `synapse.module_api`) and registered with the +[`ModuleApi.register_cached_function`](https://github.com/matrix-org/synapse/blob/release-v1.77/synapse/module_api/__init__.py#L888) +API when initialising the module. If the module needs to invalidate an entry in a cache, +it needs to use the [`ModuleApi.invalidate_cache`](https://github.com/matrix-org/synapse/blob/release-v1.77/synapse/module_api/__init__.py#L904) +API, with the function to invalidate the cache of and the key(s) of the entry to +invalidate. + +Below is an example of a simple module using a cached function: + +```python +from typing import Any +from synapse.module_api import cached, ModuleApi + +class MyModule: + def __init__(self, config: Any, api: ModuleApi): + self.api = api + + # Register the cached function so Synapse knows how to correctly invalidate + # entries for it. + self.api.register_cached_function(self.get_user_from_id) + + @cached() + async def get_department_for_user(self, user_id: str) -> str: + """A function with a cache.""" + # Request a department from an external service. + return await self.http_client.get_json( + "https://int.example.com/users", {"user_id": user_id) + )["department"] + + async def do_something_with_users(self) -> None: + """Calls the cached function and then invalidates an entry in its cache.""" + + user_id = "@alice:example.com" + + # Get the user. Since get_department_for_user is wrapped with a cache, + # the return value for this user_id will be cached. + department = await self.get_department_for_user(user_id) + + # Do something with `department`... + + # Let's say something has changed with our user, and the entry we have for + # them in the cache is out of date, so we want to invalidate it. + await self.api.invalidate_cache(self.get_department_for_user, (user_id,)) +``` + +See the [`cached` docstring](https://github.com/matrix-org/synapse/blob/release-v1.77/synapse/module_api/__init__.py#L190) for more details. diff --git a/docs/upgrade.md b/docs/upgrade.md index 15167b8c58..f06e874054 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,30 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.79.0 + +## The `on_threepid_bind` module callback method has been deprecated + +Synapse v1.79.0 deprecates the +[`on_threepid_bind`](modules/third_party_rules_callbacks.md#on_threepid_bind) +"third-party rules" Synapse module callback method in favour of a new module method, +[`on_add_user_third_party_identifier`](modules/third_party_rules_callbacks.md#on_add_user_third_party_identifier). +`on_threepid_bind` will be removed in a future version of Synapse. You should check whether any Synapse +modules in use in your deployment are making use of `on_threepid_bind`, and update them where possible. + +The arguments and functionality of the new method are the same. + +The justification behind the name change is that the old method's name, `on_threepid_bind`, was +misleading. A user is considered to "bind" their third-party ID to their Matrix ID only if they +do so via an [identity server](https://spec.matrix.org/latest/identity-service-api/) +(so that users on other homeservers may find them). But this method was not called in that case - +it was only called when a user added a third-party identifier on the local homeserver. + +Module developers may also be interested in the related +[`on_remove_user_third_party_identifier`](modules/third_party_rules_callbacks.md#on_remove_user_third_party_identifier) +module callback method that was also added in Synapse v1.79.0. This new method is called when a +user removes a third-party identifier from their account. + # Upgrading to v1.78.0 ## Deprecate the `/_synapse/admin/v1/media/<server_name>/delete` admin API diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 4139961810..015855ee7e 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -2227,8 +2227,8 @@ allows the shared secret to be specified in an external file. The file should be a plain text file, containing only the shared secret. -If this file does not exist, Synapse will create a new signing -key on startup and store it in this file. +If this file does not exist, Synapse will create a new shared +secret on startup and store it in this file. Example configuration: ```yaml diff --git a/docs/workers.md b/docs/workers.md index 2eb970ffa6..fa536cd310 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -232,6 +232,7 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms$ ^/_matrix/client/v1/rooms/.*/timestamp_to_event$ ^/_matrix/client/(api/v1|r0|v3|unstable)/search$ + ^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$) # Encryption requests ^/_matrix/client/(r0|v3|unstable)/keys/query$ @@ -251,6 +252,7 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state/ ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ ^/_matrix/client/(api/v1|r0|v3|unstable)/join/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/knock/ ^/_matrix/client/(api/v1|r0|v3|unstable)/profile/ # Account data requests diff --git a/mypy.ini b/mypy.ini index 94562d0bce..572734f8e7 100644 --- a/mypy.ini +++ b/mypy.ini @@ -36,9 +36,6 @@ exclude = (?x) [mypy-synapse.federation.transport.client] disallow_untyped_defs = False -[mypy-synapse.http.client] -disallow_untyped_defs = False - [mypy-synapse.http.matrixfederationclient] disallow_untyped_defs = False diff --git a/poetry.lock b/poetry.lock index 8ffdab7a22..cd3dc6fdcd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1985,28 +1985,29 @@ jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] [[package]] name = "ruff" -version = "0.0.237" +version = "0.0.252" description = "An extremely fast Python linter, written in Rust." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.0.237-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:2ea04d826ffca58a7ae926115a801960c757d53c9027f2ca9acbe84c9f2b2f04"}, - {file = "ruff-0.0.237-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:8ed113937fab9f73f8c1a6c0350bb4fe03e951370139c6e0adb81f48a8dcf4c6"}, - {file = "ruff-0.0.237-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9bcb71a3efb5fe886eb48d739cfae5df4a15617e7b5a7668aa45ebf74c0d3fa"}, - {file = "ruff-0.0.237-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:80ce10718abbf502818c0d650ebab99fdcef5e937a1ded3884493ddff804373c"}, - {file = "ruff-0.0.237-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cc6cb7c1efcc260df5a939435649610a28f9f438b8b313384c8985ac6574f9f"}, - {file = "ruff-0.0.237-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7eef0c7a1e45a4e30328ae101613575944cbf47a3a11494bf9827722da6c66b3"}, - {file = "ruff-0.0.237-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0d122433a21ce4a21fbba34b73fc3add0ccddd1643b3ff5abb8d2767952f872e"}, - {file = "ruff-0.0.237-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b76311335adda4de3c1d471e64e89a49abfeebf02647e3db064e7740e7f36ed6"}, - {file = "ruff-0.0.237-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46c5977b643aaf2b6f84641265f835b6c7f67fcca38dbae08c4f15602e084ca0"}, - {file = "ruff-0.0.237-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3d6ed86d0d4d742360a262d52191581f12b669a68e59ae3b52e80d7483b3d7b3"}, - {file = "ruff-0.0.237-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fedfb60f986c26cdb1809db02866e68508db99910c587d2c4066a5c07aa85593"}, - {file = "ruff-0.0.237-py3-none-musllinux_1_2_i686.whl", hash = "sha256:bb96796be5919871fa9ae7e88968ba9e14306d9a3f217ca6c204f68a5abeccdd"}, - {file = "ruff-0.0.237-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ea239cfedf67b74ea4952e1074bb99a4281c2145441d70bc7e2f058d5c49f1c9"}, - {file = "ruff-0.0.237-py3-none-win32.whl", hash = "sha256:8d6a1d21ae15da2b1dcffeee2606e90de0e6717e72957da7d16ab6ae18dd0058"}, - {file = "ruff-0.0.237-py3-none-win_amd64.whl", hash = "sha256:525e5ec81cee29b993f77976026a6bf44528a14aa6edb1ef47bd8079147395ae"}, - {file = "ruff-0.0.237.tar.gz", hash = "sha256:630c575f543733adf6c19a11d9a02ca9ecc364bd7140af8a4c854d4728be6b56"}, + {file = "ruff-0.0.252-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:349367a227c4db7abbc3a9993efea8a608b5bea4bb4a1e5fc6f0d56819524f92"}, + {file = "ruff-0.0.252-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:ce77f9106d96b4faf7865860fb5155b9deaf6f699d9c279118c5ad947739ecaf"}, + {file = "ruff-0.0.252-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edadb0b050293b4e60dab979ba6a4e734d9c899cbe316a0ee5b65e3cdd39c750"}, + {file = "ruff-0.0.252-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4efdae98937d1e4d23ab0b7fc7e8e6b6836cc7d2d42238ceeacbc793ef780542"}, + {file = "ruff-0.0.252-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8546d879f7d3f669379a03e7b103d90e11901976ab508aeda59c03dfd8a359e"}, + {file = "ruff-0.0.252-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:83fdc7169b6c1fb5fe8d1cdf345697f558c1b433ef97df9ca11defa2a8f3ee9e"}, + {file = "ruff-0.0.252-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84ed9be1a17e2a556a571a5b959398633dd10910abd8dcf8b098061e746e892d"}, + {file = "ruff-0.0.252-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f5e77bd9ba4438cf2ee32154e2673afe22f538ef29f5d65ca47e3dc46c42cf8"}, + {file = "ruff-0.0.252-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5179b94b45c0f8512eaff3ab304c14714a46df2e9ca72a9d96084adc376b71"}, + {file = "ruff-0.0.252-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:92efd8a71157595df5bc46aaaa0613d8a2fbc5cddc53ae7b749c16025c324732"}, + {file = "ruff-0.0.252-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd350fc10832cfd28e681d829a8aa83ea3e653326e0ea9d98637dfb8d46177d2"}, + {file = "ruff-0.0.252-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f119240c9631216e846166e06023b1d878e25fbac93bf20da50069e91cfbfaee"}, + {file = "ruff-0.0.252-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5c5a49f89f5ede93d16eddfeeadd7e5739ec703e8f63ac95eac30236b9e49da3"}, + {file = "ruff-0.0.252-py3-none-win32.whl", hash = "sha256:89a897dc743f2fe063483ea666097e72e848f4bbe40493fe0533e61799959f6e"}, + {file = "ruff-0.0.252-py3-none-win_amd64.whl", hash = "sha256:cdc89ad6ff88519b1fb1816ac82a9ad910762c90ff5fd64dda7691b72d36aff7"}, + {file = "ruff-0.0.252-py3-none-win_arm64.whl", hash = "sha256:4b594a17cf53077165429486650658a0e1b2ac6ab88954f5afd50d2b1b5657a9"}, + {file = "ruff-0.0.252.tar.gz", hash = "sha256:6992611ab7bdbe7204e4831c95ddd3febfeece2e6f5e44bbed044454c7db0f63"}, ] [[package]] @@ -2601,18 +2602,6 @@ types-enum34 = "*" types-ipaddress = "*" [[package]] -name = "types-docutils" -version = "0.19.1.1" -description = "Typing stubs for docutils" -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "types-docutils-0.19.1.1.tar.gz", hash = "sha256:be0a51ba1c7dd215d9d2df66d6845e63c1009b4bbf4c5beb87a0d9745cdba962"}, - {file = "types_docutils-0.19.1.1-py3-none-any.whl", hash = "sha256:a024cada35f0c13cc45eb0b68a102719018a634013690b7fef723bcbfadbd1f1"}, -] - -[[package]] name = "types-enum34" version = "1.1.8" description = "Typing stubs for enum34" @@ -2650,14 +2639,14 @@ files = [ [[package]] name = "types-opentracing" -version = "2.4.10.1" +version = "2.4.10.3" description = "Typing stubs for opentracing" category = "dev" optional = false python-versions = "*" files = [ - {file = "types-opentracing-2.4.10.1.tar.gz", hash = "sha256:49e7e52b8b6e221865a9201fc8c2df0bcda8e7098d4ebb35903dbfa4b4d29195"}, - {file = "types_opentracing-2.4.10.1-py3-none-any.whl", hash = "sha256:eb63394acd793e7d9e327956242349fee14580a87c025408dc268d4dd883cc24"}, + {file = "types-opentracing-2.4.10.3.tar.gz", hash = "sha256:b277f114265b41216714f9c77dffcab57038f1730fd141e2c55c5c9f6f2caa87"}, + {file = "types_opentracing-2.4.10.3-py3-none-any.whl", hash = "sha256:60244d718fcd9de7043645ecaf597222d550432507098ab2e6268f7b589a7fa7"}, ] [[package]] @@ -2728,19 +2717,16 @@ types-urllib3 = "<1.27" [[package]] name = "types-setuptools" -version = "67.3.0.1" +version = "67.4.0.3" description = "Typing stubs for setuptools" category = "dev" optional = false python-versions = "*" files = [ - {file = "types-setuptools-67.3.0.1.tar.gz", hash = "sha256:1a26d373036c720e566823b6edd664a2db4d138b6eeba856721ec1254203474f"}, - {file = "types_setuptools-67.3.0.1-py3-none-any.whl", hash = "sha256:a7e0f0816b5b449f5bcdc0efa43da91ff81dbe6941f293a6490d68a450e130a1"}, + {file = "types-setuptools-67.4.0.3.tar.gz", hash = "sha256:19e958dfdbf1c5a628e54c2a7ee84935051afb7278d0c1cdb08ac194757ee3b1"}, + {file = "types_setuptools-67.4.0.3-py3-none-any.whl", hash = "sha256:3c83c3a6363dd3ddcdd054796705605f0fa8b8e5a39390e07a05e5f7af054978"}, ] -[package.dependencies] -types-docutils = "*" - [[package]] name = "types-urllib3" version = "1.26.10" @@ -2755,14 +2741,14 @@ files = [ [[package]] name = "typing-extensions" -version = "4.4.0" +version = "4.5.0" description = "Backported and Experimental Type Hints for Python 3.7+" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"}, - {file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"}, + {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, + {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, ] [[package]] @@ -3044,4 +3030,4 @@ user-search = ["pyicu"] [metadata] lock-version = "2.0" python-versions = "^3.7.1" -content-hash = "e12077711e5ff83f3c6038ea44c37bd49773799ec8245035b01094b7800c5c92" +content-hash = "7bcffef7b6e6d4b1113222e2ca152b3798c997872789c8a1ea01238f199d56fe" diff --git a/pyproject.toml b/pyproject.toml index cef7d295c1..27785b6e13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,7 @@ manifest-path = "rust/Cargo.toml" [tool.poetry] name = "matrix-synapse" -version = "1.78.0rc1" +version = "1.78.0" description = "Homeserver for the Matrix decentralised comms protocol" authors = ["Matrix.org Team and Contributors <packages@matrix.org>"] license = "Apache-2.0" @@ -313,7 +313,7 @@ all = [ # We pin black so that our tests don't start failing on new releases. isort = ">=5.10.1" black = ">=22.3.0" -ruff = "0.0.237" +ruff = "0.0.252" # Typechecking mypy = "*" diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index efd19a2165..7c987d4948 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -44,7 +44,6 @@ fn bench_match_exact(b: &mut Bencher) { let eval = PushRuleEvaluator::py_new( flattened_keys, false, - BTreeSet::new(), 10, Some(0), Default::default(), @@ -60,8 +59,7 @@ fn bench_match_exact(b: &mut Bencher) { let condition = Condition::Known(synapse::push::KnownCondition::EventMatch( EventMatchCondition { key: "room_id".into(), - pattern: Some("!room:server".into()), - pattern_type: None, + pattern: "!room:server".into(), }, )); @@ -93,7 +91,6 @@ fn bench_match_word(b: &mut Bencher) { let eval = PushRuleEvaluator::py_new( flattened_keys, false, - BTreeSet::new(), 10, Some(0), Default::default(), @@ -109,8 +106,7 @@ fn bench_match_word(b: &mut Bencher) { let condition = Condition::Known(synapse::push::KnownCondition::EventMatch( EventMatchCondition { key: "content.body".into(), - pattern: Some("test".into()), - pattern_type: None, + pattern: "test".into(), }, )); @@ -142,7 +138,6 @@ fn bench_match_word_miss(b: &mut Bencher) { let eval = PushRuleEvaluator::py_new( flattened_keys, false, - BTreeSet::new(), 10, Some(0), Default::default(), @@ -158,8 +153,7 @@ fn bench_match_word_miss(b: &mut Bencher) { let condition = Condition::Known(synapse::push::KnownCondition::EventMatch( EventMatchCondition { key: "content.body".into(), - pattern: Some("foobar".into()), - pattern_type: None, + pattern: "foobar".into(), }, )); @@ -191,7 +185,6 @@ fn bench_eval_message(b: &mut Bencher) { let eval = PushRuleEvaluator::py_new( flattened_keys, false, - BTreeSet::new(), 10, Some(0), Default::default(), diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs index 4a62b9696f..3d72a4a4c3 100644 --- a/rust/src/push/base_rules.rs +++ b/rust/src/push/base_rules.rs @@ -21,13 +21,13 @@ use lazy_static::lazy_static; use serde_json::Value; use super::KnownCondition; -use crate::push::Condition; -use crate::push::EventMatchCondition; -use crate::push::PushRule; -use crate::push::RelatedEventMatchCondition; +use crate::push::RelatedEventMatchTypeCondition; use crate::push::SetTweak; use crate::push::TweakValue; use crate::push::{Action, ExactEventMatchCondition, SimpleJsonValue}; +use crate::push::{Condition, EventMatchTypeCondition}; +use crate::push::{EventMatchCondition, EventMatchPatternType}; +use crate::push::{ExactEventMatchTypeCondition, PushRule}; const HIGHLIGHT_ACTION: Action = Action::SetTweak(SetTweak { set_tweak: Cow::Borrowed("highlight"), @@ -72,8 +72,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("content.m.relates_to.rel_type"), - pattern: Some(Cow::Borrowed("m.replace")), - pattern_type: None, + pattern: Cow::Borrowed("m.replace"), }, ))]), actions: Cow::Borrowed(&[]), @@ -86,8 +85,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("content.msgtype"), - pattern: Some(Cow::Borrowed("m.notice")), - pattern_type: None, + pattern: Cow::Borrowed("m.notice"), }, ))]), actions: Cow::Borrowed(&[Action::DontNotify]), @@ -100,18 +98,15 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.room.member")), - pattern_type: None, + pattern: Cow::Borrowed("m.room.member"), })), Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("content.membership"), - pattern: Some(Cow::Borrowed("invite")), - pattern_type: None, + pattern: Cow::Borrowed("invite"), })), - Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + Condition::Known(KnownCondition::EventMatchType(EventMatchTypeCondition { key: Cow::Borrowed("state_key"), - pattern: None, - pattern_type: Some(Cow::Borrowed("user_id")), + pattern_type: Cow::Borrowed(&EventMatchPatternType::UserId), })), ]), actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION, SOUND_ACTION]), @@ -124,8 +119,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.room.member")), - pattern_type: None, + pattern: Cow::Borrowed("m.room.member"), }, ))]), actions: Cow::Borrowed(&[Action::DontNotify]), @@ -135,11 +129,10 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ PushRule { rule_id: Cow::Borrowed("global/override/.im.nheko.msc3664.reply"), priority_class: 5, - conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelatedEventMatch( - RelatedEventMatchCondition { - key: Some(Cow::Borrowed("sender")), - pattern: None, - pattern_type: Some(Cow::Borrowed("user_id")), + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelatedEventMatchType( + RelatedEventMatchTypeCondition { + key: Cow::Borrowed("sender"), + pattern_type: Cow::Borrowed(&EventMatchPatternType::UserId), rel_type: Cow::Borrowed("m.in_reply_to"), include_fallbacks: None, }, @@ -151,7 +144,12 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ PushRule { rule_id: Cow::Borrowed(".org.matrix.msc3952.is_user_mention"), priority_class: 5, - conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::IsUserMention)]), + conditions: Cow::Borrowed(&[Condition::Known( + KnownCondition::ExactEventPropertyContainsType(ExactEventMatchTypeCondition { + key: Cow::Borrowed("content.org.matrix.msc3952.mentions.user_ids"), + value_type: Cow::Borrowed(&EventMatchPatternType::UserId), + }), + )]), actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]), default: true, default_enabled: true, @@ -189,8 +187,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ }), Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("content.body"), - pattern: Some(Cow::Borrowed("@room")), - pattern_type: None, + pattern: Cow::Borrowed("@room"), })), ]), actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION]), @@ -203,13 +200,11 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.room.tombstone")), - pattern_type: None, + pattern: Cow::Borrowed("m.room.tombstone"), })), Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("state_key"), - pattern: Some(Cow::Borrowed("")), - pattern_type: None, + pattern: Cow::Borrowed(""), })), ]), actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION]), @@ -222,8 +217,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.reaction")), - pattern_type: None, + pattern: Cow::Borrowed("m.reaction"), }, ))]), actions: Cow::Borrowed(&[]), @@ -236,13 +230,11 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.room.server_acl")), - pattern_type: None, + pattern: Cow::Borrowed("m.room.server_acl"), })), Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("state_key"), - pattern: Some(Cow::Borrowed("")), - pattern_type: None, + pattern: Cow::Borrowed(""), })), ]), actions: Cow::Borrowed(&[]), @@ -255,8 +247,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.response")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc3381.poll.response"), }, ))]), actions: Cow::Borrowed(&[]), @@ -268,11 +259,10 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ pub const BASE_APPEND_CONTENT_RULES: &[PushRule] = &[PushRule { rule_id: Cow::Borrowed("global/content/.m.rule.contains_user_name"), priority_class: 4, - conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( - EventMatchCondition { + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatchType( + EventMatchTypeCondition { key: Cow::Borrowed("content.body"), - pattern: None, - pattern_type: Some(Cow::Borrowed("user_localpart")), + pattern_type: Cow::Borrowed(&EventMatchPatternType::UserLocalpart), }, ))]), actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]), @@ -287,8 +277,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.call.invite")), - pattern_type: None, + pattern: Cow::Borrowed("m.call.invite"), }, ))]), actions: Cow::Borrowed(&[Action::Notify, RING_ACTION, HIGHLIGHT_FALSE_ACTION]), @@ -301,8 +290,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.room.message")), - pattern_type: None, + pattern: Cow::Borrowed("m.room.message"), })), Condition::Known(KnownCondition::RoomMemberCount { is: Some(Cow::Borrowed("2")), @@ -318,8 +306,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.room.encrypted")), - pattern_type: None, + pattern: Cow::Borrowed("m.room.encrypted"), })), Condition::Known(KnownCondition::RoomMemberCount { is: Some(Cow::Borrowed("2")), @@ -338,8 +325,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("org.matrix.msc1767.encrypted")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc1767.encrypted"), })), Condition::Known(KnownCondition::RoomMemberCount { is: Some(Cow::Borrowed("2")), @@ -363,8 +349,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("org.matrix.msc1767.message")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc1767.message"), })), Condition::Known(KnownCondition::RoomMemberCount { is: Some(Cow::Borrowed("2")), @@ -388,8 +373,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("org.matrix.msc1767.file")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc1767.file"), })), Condition::Known(KnownCondition::RoomMemberCount { is: Some(Cow::Borrowed("2")), @@ -413,8 +397,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("org.matrix.msc1767.image")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc1767.image"), })), Condition::Known(KnownCondition::RoomMemberCount { is: Some(Cow::Borrowed("2")), @@ -438,8 +421,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("org.matrix.msc1767.video")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc1767.video"), })), Condition::Known(KnownCondition::RoomMemberCount { is: Some(Cow::Borrowed("2")), @@ -463,8 +445,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("org.matrix.msc1767.audio")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc1767.audio"), })), Condition::Known(KnownCondition::RoomMemberCount { is: Some(Cow::Borrowed("2")), @@ -485,8 +466,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.room.message")), - pattern_type: None, + pattern: Cow::Borrowed("m.room.message"), }, ))]), actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), @@ -499,8 +479,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("m.room.encrypted")), - pattern_type: None, + pattern: Cow::Borrowed("m.room.encrypted"), }, ))]), actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), @@ -514,8 +493,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("m.encrypted")), - pattern_type: None, + pattern: Cow::Borrowed("m.encrypted"), })), // MSC3933: Add condition on top of template rule - see MSC. Condition::Known(KnownCondition::RoomVersionSupports { @@ -534,8 +512,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("m.message")), - pattern_type: None, + pattern: Cow::Borrowed("m.message"), })), // MSC3933: Add condition on top of template rule - see MSC. Condition::Known(KnownCondition::RoomVersionSupports { @@ -554,8 +531,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("m.file")), - pattern_type: None, + pattern: Cow::Borrowed("m.file"), })), // MSC3933: Add condition on top of template rule - see MSC. Condition::Known(KnownCondition::RoomVersionSupports { @@ -574,8 +550,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("m.image")), - pattern_type: None, + pattern: Cow::Borrowed("m.image"), })), // MSC3933: Add condition on top of template rule - see MSC. Condition::Known(KnownCondition::RoomVersionSupports { @@ -594,8 +569,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("m.video")), - pattern_type: None, + pattern: Cow::Borrowed("m.video"), })), // MSC3933: Add condition on top of template rule - see MSC. Condition::Known(KnownCondition::RoomVersionSupports { @@ -614,8 +588,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), // MSC3933: Type changed from template rule - see MSC. - pattern: Some(Cow::Borrowed("m.audio")), - pattern_type: None, + pattern: Cow::Borrowed("m.audio"), })), // MSC3933: Add condition on top of template rule - see MSC. Condition::Known(KnownCondition::RoomVersionSupports { @@ -633,18 +606,15 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[ Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("im.vector.modular.widgets")), - pattern_type: None, + pattern: Cow::Borrowed("im.vector.modular.widgets"), })), Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("content.type"), - pattern: Some(Cow::Borrowed("jitsi")), - pattern_type: None, + pattern: Cow::Borrowed("jitsi"), })), Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("state_key"), - pattern: Some(Cow::Borrowed("*")), - pattern_type: None, + pattern: Cow::Borrowed("*"), })), ]), actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), @@ -660,8 +630,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ }), Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.start")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc3381.poll.start"), })), ]), actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION]), @@ -674,8 +643,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.start")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc3381.poll.start"), }, ))]), actions: Cow::Borrowed(&[Action::Notify]), @@ -691,8 +659,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ }), Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.end")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc3381.poll.end"), })), ]), actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION]), @@ -705,8 +672,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( EventMatchCondition { key: Cow::Borrowed("type"), - pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.end")), - pattern_type: None, + pattern: Cow::Borrowed("org.matrix.msc3381.poll.end"), }, ))]), actions: Cow::Borrowed(&[Action::Notify]), diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index 55551ecb56..55846627cc 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{BTreeMap, BTreeSet}; +use std::borrow::Cow; +use std::collections::BTreeMap; -use crate::push::JsonValue; +use crate::push::{EventMatchPatternType, JsonValue}; use anyhow::{Context, Error}; use lazy_static::lazy_static; use log::warn; @@ -23,8 +24,8 @@ use regex::Regex; use super::{ utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType}, - Action, Condition, EventMatchCondition, ExactEventMatchCondition, FilteredPushRules, - KnownCondition, RelatedEventMatchCondition, SimpleJsonValue, + Action, Condition, ExactEventMatchCondition, FilteredPushRules, KnownCondition, + SimpleJsonValue, }; lazy_static! { @@ -71,8 +72,6 @@ pub struct PushRuleEvaluator { /// True if the event has a mentions property and MSC3952 support is enabled. has_mentions: bool, - /// The user mentions that were part of the message. - user_mentions: BTreeSet<String>, /// The number of users in the room. room_member_count: u64, @@ -113,7 +112,6 @@ impl PushRuleEvaluator { pub fn py_new( flattened_keys: BTreeMap<String, JsonValue>, has_mentions: bool, - user_mentions: BTreeSet<String>, room_member_count: u64, sender_power_level: Option<i64>, notification_power_levels: BTreeMap<String, i64>, @@ -133,7 +131,6 @@ impl PushRuleEvaluator { flattened_keys, body, has_mentions, - user_mentions, room_member_count, notification_power_levels, sender_power_level, @@ -256,24 +253,83 @@ impl PushRuleEvaluator { }; let result = match known_condition { - KnownCondition::EventMatch(event_match) => { - self.match_event_match(event_match, user_id)? + KnownCondition::EventMatch(event_match) => self.match_event_match( + &self.flattened_keys, + &event_match.key, + &event_match.pattern, + )?, + KnownCondition::EventMatchType(event_match) => { + // The `pattern_type` can either be "user_id" or "user_localpart", + // either way if we don't have a `user_id` then the condition can't + // match. + let user_id = if let Some(user_id) = user_id { + user_id + } else { + return Ok(false); + }; + + let pattern = match &*event_match.pattern_type { + EventMatchPatternType::UserId => user_id, + EventMatchPatternType::UserLocalpart => get_localpart_from_id(user_id)?, + }; + + self.match_event_match(&self.flattened_keys, &event_match.key, pattern)? } KnownCondition::ExactEventMatch(exact_event_match) => { self.match_exact_event_match(exact_event_match)? } - KnownCondition::RelatedEventMatch(event_match) => { - self.match_related_event_match(event_match, user_id)? - } - KnownCondition::ExactEventPropertyContains(exact_event_match) => { - self.match_exact_event_property_contains(exact_event_match)? + KnownCondition::RelatedEventMatch(event_match) => self.match_related_event_match( + &event_match.rel_type.clone(), + event_match.include_fallbacks, + event_match.key.clone(), + event_match.pattern.clone(), + )?, + KnownCondition::RelatedEventMatchType(event_match) => { + // The `pattern_type` can either be "user_id" or "user_localpart", + // either way if we don't have a `user_id` then the condition can't + // match. + let user_id = if let Some(user_id) = user_id { + user_id + } else { + return Ok(false); + }; + + let pattern = match &*event_match.pattern_type { + EventMatchPatternType::UserId => user_id, + EventMatchPatternType::UserLocalpart => get_localpart_from_id(user_id)?, + }; + + self.match_related_event_match( + &event_match.rel_type.clone(), + event_match.include_fallbacks, + Some(event_match.key.clone()), + Some(Cow::Borrowed(pattern)), + )? } - KnownCondition::IsUserMention => { - if let Some(uid) = user_id { - self.user_mentions.contains(uid) + KnownCondition::ExactEventPropertyContains(exact_event_match) => self + .match_exact_event_property_contains( + exact_event_match.key.clone(), + exact_event_match.value.clone(), + )?, + KnownCondition::ExactEventPropertyContainsType(exact_event_match) => { + // The `pattern_type` can either be "user_id" or "user_localpart", + // either way if we don't have a `user_id` then the condition can't + // match. + let user_id = if let Some(user_id) = user_id { + user_id } else { - false - } + return Ok(false); + }; + + let pattern = match &*exact_event_match.value_type { + EventMatchPatternType::UserId => user_id, + EventMatchPatternType::UserLocalpart => get_localpart_from_id(user_id)?, + }; + + self.match_exact_event_property_contains( + exact_event_match.key.clone(), + Cow::Borrowed(&SimpleJsonValue::Str(pattern.to_string())), + )? } KnownCondition::ContainsDisplayName => { if let Some(dn) = display_name { @@ -325,32 +381,12 @@ impl PushRuleEvaluator { /// Evaluates a `event_match` condition. fn match_event_match( &self, - event_match: &EventMatchCondition, - user_id: Option<&str>, + flattened_event: &BTreeMap<String, JsonValue>, + key: &str, + pattern: &str, ) -> Result<bool, Error> { - let pattern = if let Some(pattern) = &event_match.pattern { - pattern - } else if let Some(pattern_type) = &event_match.pattern_type { - // The `pattern_type` can either be "user_id" or "user_localpart", - // either way if we don't have a `user_id` then the condition can't - // match. - let user_id = if let Some(user_id) = user_id { - user_id - } else { - return Ok(false); - }; - - match &**pattern_type { - "user_id" => user_id, - "user_localpart" => get_localpart_from_id(user_id)?, - _ => return Ok(false), - } - } else { - return Ok(false); - }; - let haystack = if let Some(JsonValue::Value(SimpleJsonValue::Str(haystack))) = - self.flattened_keys.get(&*event_match.key) + flattened_event.get(key) { haystack } else { @@ -359,7 +395,7 @@ impl PushRuleEvaluator { // For the content.body we match against "words", but for everything // else we match against the entire value. - let match_type = if event_match.key == "content.body" { + let match_type = if key == "content.body" { GlobMatchType::Word } else { GlobMatchType::Whole @@ -395,8 +431,10 @@ impl PushRuleEvaluator { /// Evaluates a `related_event_match` condition. (MSC3664) fn match_related_event_match( &self, - event_match: &RelatedEventMatchCondition, - user_id: Option<&str>, + rel_type: &str, + include_fallbacks: Option<bool>, + key: Option<Cow<str>>, + pattern: Option<Cow<str>>, ) -> Result<bool, Error> { // First check if related event matching is enabled... if !self.related_event_match_enabled { @@ -404,7 +442,7 @@ impl PushRuleEvaluator { } // get the related event, fail if there is none. - let event = if let Some(event) = self.related_events_flattened.get(&*event_match.rel_type) { + let event = if let Some(event) = self.related_events_flattened.get(rel_type) { event } else { return Ok(false); @@ -412,81 +450,38 @@ impl PushRuleEvaluator { // If we are not matching fallbacks, don't match if our special key indicating this is a // fallback relation is not present. - if !event_match.include_fallbacks.unwrap_or(false) - && event.contains_key("im.vector.is_falling_back") - { + if !include_fallbacks.unwrap_or(false) && event.contains_key("im.vector.is_falling_back") { return Ok(false); } - // if we have no key, accept the event as matching, if it existed without matching any - // fields. - let key = if let Some(key) = &event_match.key { - key - } else { - return Ok(true); - }; - - let pattern = if let Some(pattern) = &event_match.pattern { - pattern - } else if let Some(pattern_type) = &event_match.pattern_type { - // The `pattern_type` can either be "user_id" or "user_localpart", - // either way if we don't have a `user_id` then the condition can't - // match. - let user_id = if let Some(user_id) = user_id { - user_id - } else { - return Ok(false); - }; - - match &**pattern_type { - "user_id" => user_id, - "user_localpart" => get_localpart_from_id(user_id)?, - _ => return Ok(false), - } - } else { - return Ok(false); - }; - - let haystack = - if let Some(JsonValue::Value(SimpleJsonValue::Str(haystack))) = event.get(&**key) { - haystack - } else { - return Ok(false); - }; - - // For the content.body we match against "words", but for everything - // else we match against the entire value. - let match_type = if key == "content.body" { - GlobMatchType::Word - } else { - GlobMatchType::Whole - }; - - let mut compiled_pattern = get_glob_matcher(pattern, match_type)?; - compiled_pattern.is_match(haystack) + match (key, pattern) { + // if we have no key, accept the event as matching. + (None, _) => Ok(true), + // There was a key, so we *must* have a pattern to go with it. + (Some(_), None) => Ok(false), + // If there is a key & pattern, check if they're in the flattened event (given by rel_type). + (Some(key), Some(pattern)) => self.match_event_match(event, &key, &pattern), + } } /// Evaluates a `exact_event_property_contains` condition. (MSC3758) fn match_exact_event_property_contains( &self, - exact_event_match: &ExactEventMatchCondition, + key: Cow<str>, + value: Cow<SimpleJsonValue>, ) -> Result<bool, Error> { // First check if the feature is enabled. if !self.msc3966_exact_event_property_contains { return Ok(false); } - let value = &exact_event_match.value; - - let haystack = if let Some(JsonValue::Array(haystack)) = - self.flattened_keys.get(&*exact_event_match.key) - { + let haystack = if let Some(JsonValue::Array(haystack)) = self.flattened_keys.get(&*key) { haystack } else { return Ok(false); }; - Ok(haystack.contains(&**value)) + Ok(haystack.contains(&value)) } /// Match the member count against an 'is' condition @@ -523,7 +518,6 @@ fn push_rule_evaluator() { let evaluator = PushRuleEvaluator::py_new( flattened_keys, false, - BTreeSet::new(), 10, Some(0), BTreeMap::new(), @@ -555,7 +549,6 @@ fn test_requires_room_version_supports_condition() { let evaluator = PushRuleEvaluator::py_new( flattened_keys, false, - BTreeSet::new(), 10, Some(0), BTreeMap::new(), diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index fdd2b2c143..6391d2ed47 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -328,14 +328,24 @@ pub enum Condition { #[serde(tag = "kind")] pub enum KnownCondition { EventMatch(EventMatchCondition), + // Identical to event_match but gives predefined patterns. Cannot be added by users. + #[serde(skip_deserializing, rename = "event_match")] + EventMatchType(EventMatchTypeCondition), #[serde(rename = "com.beeper.msc3758.exact_event_match")] ExactEventMatch(ExactEventMatchCondition), #[serde(rename = "im.nheko.msc3664.related_event_match")] RelatedEventMatch(RelatedEventMatchCondition), + // Identical to related_event_match but gives predefined patterns. Cannot be added by users. + #[serde(skip_deserializing, rename = "im.nheko.msc3664.related_event_match")] + RelatedEventMatchType(RelatedEventMatchTypeCondition), #[serde(rename = "org.matrix.msc3966.exact_event_property_contains")] ExactEventPropertyContains(ExactEventMatchCondition), - #[serde(rename = "org.matrix.msc3952.is_user_mention")] - IsUserMention, + // Identical to exact_event_property_contains but gives predefined patterns. Cannot be added by users. + #[serde( + skip_deserializing, + rename = "org.matrix.msc3966.exact_event_property_contains" + )] + ExactEventPropertyContainsType(ExactEventMatchTypeCondition), ContainsDisplayName, RoomMemberCount { #[serde(skip_serializing_if = "Option::is_none")] @@ -362,14 +372,27 @@ impl<'source> FromPyObject<'source> for Condition { } } -/// The body of a [`Condition::EventMatch`] +/// The body of a [`Condition::EventMatch`] with a pattern. #[derive(Serialize, Deserialize, Debug, Clone)] pub struct EventMatchCondition { pub key: Cow<'static, str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub pattern: Option<Cow<'static, str>>, - #[serde(skip_serializing_if = "Option::is_none")] - pub pattern_type: Option<Cow<'static, str>>, + pub pattern: Cow<'static, str>, +} + +#[derive(Serialize, Debug, Clone)] +#[serde(rename_all = "snake_case")] +pub enum EventMatchPatternType { + UserId, + UserLocalpart, +} + +/// The body of a [`Condition::EventMatch`] that uses user_id or user_localpart as a pattern. +#[derive(Serialize, Debug, Clone)] +pub struct EventMatchTypeCondition { + pub key: Cow<'static, str>, + // During serialization, the pattern_type property gets replaced with a + // pattern property of the correct value in synapse.push.clientformat.format_push_rules_for_user. + pub pattern_type: Cow<'static, EventMatchPatternType>, } /// The body of a [`Condition::ExactEventMatch`] @@ -379,6 +402,15 @@ pub struct ExactEventMatchCondition { pub value: Cow<'static, SimpleJsonValue>, } +/// The body of a [`Condition::ExactEventMatch`] that uses user_id or user_localpart as a pattern. +#[derive(Serialize, Debug, Clone)] +pub struct ExactEventMatchTypeCondition { + pub key: Cow<'static, str>, + // During serialization, the pattern_type property gets replaced with a + // pattern property of the correct value in synapse.push.clientformat.format_push_rules_for_user. + pub value_type: Cow<'static, EventMatchPatternType>, +} + /// The body of a [`Condition::RelatedEventMatch`] #[derive(Serialize, Deserialize, Debug, Clone)] pub struct RelatedEventMatchCondition { @@ -386,8 +418,18 @@ pub struct RelatedEventMatchCondition { pub key: Option<Cow<'static, str>>, #[serde(skip_serializing_if = "Option::is_none")] pub pattern: Option<Cow<'static, str>>, + pub rel_type: Cow<'static, str>, #[serde(skip_serializing_if = "Option::is_none")] - pub pattern_type: Option<Cow<'static, str>>, + pub include_fallbacks: Option<bool>, +} + +/// The body of a [`Condition::RelatedEventMatch`] that uses user_id or user_localpart as a pattern. +#[derive(Serialize, Debug, Clone)] +pub struct RelatedEventMatchTypeCondition { + // This is only used if pattern_type exists (and thus key must exist), so is + // a bit simpler than RelatedEventMatchCondition. + pub key: Cow<'static, str>, + pub pattern_type: Cow<'static, EventMatchPatternType>, pub rel_type: Cow<'static, str>, #[serde(skip_serializing_if = "Option::is_none")] pub include_fallbacks: Option<bool>, @@ -571,8 +613,7 @@ impl FilteredPushRules { fn test_serialize_condition() { let condition = Condition::Known(KnownCondition::EventMatch(EventMatchCondition { key: "content.body".into(), - pattern: Some("coffee".into()), - pattern_type: None, + pattern: "coffee".into(), })); let json = serde_json::to_string(&condition).unwrap(); @@ -586,7 +627,33 @@ fn test_serialize_condition() { fn test_deserialize_condition() { let json = r#"{"kind":"event_match","key":"content.body","pattern":"coffee"}"#; - let _: Condition = serde_json::from_str(json).unwrap(); + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::EventMatch(_)) + )); +} + +#[test] +fn test_serialize_event_match_condition_with_pattern_type() { + let condition = Condition::Known(KnownCondition::EventMatchType(EventMatchTypeCondition { + key: "content.body".into(), + pattern_type: Cow::Owned(EventMatchPatternType::UserId), + })); + + let json = serde_json::to_string(&condition).unwrap(); + assert_eq!( + json, + r#"{"kind":"event_match","key":"content.body","pattern_type":"user_id"}"# + ) +} + +#[test] +fn test_cannot_deserialize_event_match_condition_with_pattern_type() { + let json = r#"{"kind":"event_match","key":"content.body","pattern_type":"user_id"}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!(condition, Condition::Unknown(_))); } #[test] @@ -601,6 +668,37 @@ fn test_deserialize_unstable_msc3664_condition() { } #[test] +fn test_serialize_unstable_msc3664_condition_with_pattern_type() { + let condition = Condition::Known(KnownCondition::RelatedEventMatchType( + RelatedEventMatchTypeCondition { + key: "content.body".into(), + pattern_type: Cow::Owned(EventMatchPatternType::UserId), + rel_type: "m.in_reply_to".into(), + include_fallbacks: Some(true), + }, + )); + + let json = serde_json::to_string(&condition).unwrap(); + assert_eq!( + json, + r#"{"kind":"im.nheko.msc3664.related_event_match","key":"content.body","pattern_type":"user_id","rel_type":"m.in_reply_to","include_fallbacks":true}"# + ) +} + +#[test] +fn test_cannot_deserialize_unstable_msc3664_condition_with_pattern_type() { + let json = r#"{"kind":"im.nheko.msc3664.related_event_match","key":"content.body","pattern_type":"user_id","rel_type":"m.in_reply_to"}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + // Since pattern is optional on RelatedEventMatch it deserializes it to that + // instead of RelatedEventMatchType. + assert!(matches!( + condition, + Condition::Known(KnownCondition::RelatedEventMatch(_)) + )); +} + +#[test] fn test_deserialize_unstable_msc3931_condition() { let json = r#"{"kind":"org.matrix.msc3931.room_version_supports","feature":"org.example.feature"}"#; @@ -655,17 +753,6 @@ fn test_deserialize_unstable_msc3758_condition() { } #[test] -fn test_deserialize_unstable_msc3952_user_condition() { - let json = r#"{"kind":"org.matrix.msc3952.is_user_mention"}"#; - - let condition: Condition = serde_json::from_str(json).unwrap(); - assert!(matches!( - condition, - Condition::Known(KnownCondition::IsUserMention) - )); -} - -#[test] fn test_deserialize_custom_condition() { let json = r#"{"kind":"custom_tag"}"#; diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index a8f0ed2435..c17796ffbd 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union +from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Tuple, Union from synapse.types import JsonDict, JsonValue @@ -58,7 +58,6 @@ class PushRuleEvaluator: self, flattened_keys: Mapping[str, JsonValue], has_mentions: bool, - user_mentions: Set[str], room_member_count: int, sender_power_level: Optional[int], notification_power_levels: Mapping[str, int], diff --git a/synapse/__init__.py b/synapse/__init__.py index fbfd506a43..a203ed533a 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -1,5 +1,6 @@ # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018-9 New Vector Ltd +# Copyright 2018-2019 New Vector Ltd +# Copyright 2023 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" This is a reference implementation of a Matrix homeserver. +""" This is an implementation of a Matrix homeserver. """ import json diff --git a/synapse/_scripts/move_remote_media_to_new_store.py b/synapse/_scripts/move_remote_media_to_new_store.py index 819afaaca6..0dd36bee20 100755 --- a/synapse/_scripts/move_remote_media_to_new_store.py +++ b/synapse/_scripts/move_remote_media_to_new_store.py @@ -37,7 +37,7 @@ import os import shutil import sys -from synapse.rest.media.v1.filepath import MediaFilePaths +from synapse.media.filepath import MediaFilePaths logger = logging.getLogger() diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index bc38fae0b6..fc64f2bda1 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -179,10 +179,16 @@ class ExperimentalConfig(Config): "msc3873_escape_event_match_key", False ) - # MSC3952: Intentional mentions, this depends on MSC3758. + # MSC3966: exact_event_property_contains push rule condition. + self.msc3966_exact_event_property_contains = experimental.get( + "msc3966_exact_event_property_contains", False + ) + + # MSC3952: Intentional mentions, this depends on MSC3758 and MSC3966. self.msc3952_intentional_mentions = ( experimental.get("msc3952_intentional_mentions", False) and self.msc3758_exact_event_match + and self.msc3966_exact_event_property_contains ) # MSC3959: Do not generate notifications for edits. @@ -194,3 +200,6 @@ class ExperimentalConfig(Config): self.msc3966_exact_event_property_contains = experimental.get( "msc3966_exact_event_property_contains", False ) + + # MSC3967: Do not require UIA when first uploading cross signing keys + self.msc3967_enabled = experimental.get("msc3967_enabled", False) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 2da40c09f0..ecb3edbe3a 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -178,11 +178,13 @@ class ContentRepositoryConfig(Config): for i, provider_config in enumerate(storage_providers): # We special case the module "file_system" so as not to need to # expose FileStorageProviderBackend - if provider_config["module"] == "file_system": - provider_config["module"] = ( - "synapse.rest.media.v1.storage_provider" - ".FileStorageProviderBackend" - ) + if ( + provider_config["module"] == "file_system" + or provider_config["module"] == "synapse.rest.media.v1.storage_provider" + ): + provider_config[ + "module" + ] = "synapse.media.storage_provider.FileStorageProviderBackend" provider_class, parsed_config = load_module( provider_config, ("media_storage_providers", "<item %i>" % i) diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index e0d82ad81c..a91a5d1e3c 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -23,6 +23,7 @@ from synapse.types import JsonDict, StateMap if TYPE_CHECKING: from synapse.storage.controllers import StorageControllers + from synapse.storage.databases import StateGroupDataStore from synapse.storage.databases.main import DataStore from synapse.types.state import StateFilter @@ -348,6 +349,54 @@ class UnpersistedEventContext(UnpersistedEventContextBase): partial_state: bool state_map_before_event: Optional[StateMap[str]] = None + @classmethod + async def batch_persist_unpersisted_contexts( + cls, + events_and_context: List[Tuple[EventBase, "UnpersistedEventContextBase"]], + room_id: str, + last_known_state_group: int, + datastore: "StateGroupDataStore", + ) -> List[Tuple[EventBase, EventContext]]: + """ + Takes a list of events and their associated unpersisted contexts and persists + the unpersisted contexts, returning a list of events and persisted contexts. + Note that all the events must be in a linear chain (ie a <- b <- c). + + Args: + events_and_context: A list of events and their unpersisted contexts + room_id: the room_id for the events + last_known_state_group: the last persisted state group + datastore: a state datastore + """ + amended_events_and_context = await datastore.store_state_deltas_for_batched( + events_and_context, room_id, last_known_state_group + ) + + events_and_persisted_context = [] + for event, unpersisted_context in amended_events_and_context: + if event.is_state(): + context = EventContext( + storage=unpersisted_context._storage, + state_group=unpersisted_context.state_group_after_event, + state_group_before_event=unpersisted_context.state_group_before_event, + state_delta_due_to_event=unpersisted_context.state_delta_due_to_event, + partial_state=unpersisted_context.partial_state, + prev_group=unpersisted_context.state_group_before_event, + delta_ids=unpersisted_context.state_delta_due_to_event, + ) + else: + context = EventContext( + storage=unpersisted_context._storage, + state_group=unpersisted_context.state_group_after_event, + state_group_before_event=unpersisted_context.state_group_before_event, + state_delta_due_to_event=unpersisted_context.state_delta_due_to_event, + partial_state=unpersisted_context.partial_state, + prev_group=unpersisted_context.prev_group_for_state_group_before_event, + delta_ids=unpersisted_context.delta_ids_to_state_group_before_event, + ) + events_and_persisted_context.append((event, context)) + return events_and_persisted_context + async def get_prev_state_ids( self, state_filter: Optional["StateFilter"] = None ) -> StateMap[str]: diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 623a2c71ea..765c15bb51 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -33,8 +33,8 @@ from typing_extensions import Literal import synapse from synapse.api.errors import Codes from synapse.logging.opentracing import trace -from synapse.rest.media.v1._base import FileInfo -from synapse.rest.media.v1.media_storage import ReadableFileWrapper +from synapse.media._base import FileInfo +from synapse.media.media_storage import ReadableFileWrapper from synapse.spam_checker_api import RegistrationBehaviour from synapse.types import JsonDict, RoomAlias, UserProfile from synapse.util.async_helpers import delay_cancellation, maybe_awaitable diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 9a25ed419b..3e4d52c8d8 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -45,6 +45,8 @@ CHECK_CAN_DEACTIVATE_USER_CALLBACK = Callable[[str, bool], Awaitable[bool]] ON_PROFILE_UPDATE_CALLBACK = Callable[[str, ProfileInfo, bool, bool], Awaitable] ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK = Callable[[str, bool, bool], Awaitable] ON_THREEPID_BIND_CALLBACK = Callable[[str, str, str], Awaitable] +ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK = Callable[[str, str, str], Awaitable] +ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK = Callable[[str, str, str], Awaitable] def load_legacy_third_party_event_rules(hs: "HomeServer") -> None: @@ -172,6 +174,12 @@ class ThirdPartyEventRules: ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK ] = [] self._on_threepid_bind_callbacks: List[ON_THREEPID_BIND_CALLBACK] = [] + self._on_add_user_third_party_identifier_callbacks: List[ + ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK + ] = [] + self._on_remove_user_third_party_identifier_callbacks: List[ + ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK + ] = [] def register_third_party_rules_callbacks( self, @@ -191,6 +199,12 @@ class ThirdPartyEventRules: ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK ] = None, on_threepid_bind: Optional[ON_THREEPID_BIND_CALLBACK] = None, + on_add_user_third_party_identifier: Optional[ + ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK + ] = None, + on_remove_user_third_party_identifier: Optional[ + ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK + ] = None, ) -> None: """Register callbacks from modules for each hook.""" if check_event_allowed is not None: @@ -228,6 +242,11 @@ class ThirdPartyEventRules: if on_threepid_bind is not None: self._on_threepid_bind_callbacks.append(on_threepid_bind) + if on_add_user_third_party_identifier is not None: + self._on_add_user_third_party_identifier_callbacks.append( + on_add_user_third_party_identifier + ) + async def check_event_allowed( self, event: EventBase, @@ -511,6 +530,9 @@ class ThirdPartyEventRules: local homeserver, not when it's created on an identity server (and then kept track of so that it can be unbound on the same IS later on). + THIS MODULE CALLBACK METHOD HAS BEEN DEPRECATED. Please use the + `on_add_user_third_party_identifier` callback method instead. + Args: user_id: the user being associated with the threepid. medium: the threepid's medium. @@ -523,3 +545,44 @@ class ThirdPartyEventRules: logger.exception( "Failed to run module API callback %s: %s", callback, e ) + + async def on_add_user_third_party_identifier( + self, user_id: str, medium: str, address: str + ) -> None: + """Called when an association between a user's Matrix ID and a third-party ID + (email, phone number) has successfully been registered on the homeserver. + + Args: + user_id: The User ID included in the association. + medium: The medium of the third-party ID (email, msisdn). + address: The address of the third-party ID (i.e. an email address). + """ + for callback in self._on_add_user_third_party_identifier_callbacks: + try: + await callback(user_id, medium, address) + except Exception as e: + logger.exception( + "Failed to run module API callback %s: %s", callback, e + ) + + async def on_remove_user_third_party_identifier( + self, user_id: str, medium: str, address: str + ) -> None: + """Called when an association between a user's Matrix ID and a third-party ID + (email, phone number) has been successfully removed on the homeserver. + + This is called *after* any known bindings on identity servers for this + association have been removed. + + Args: + user_id: The User ID included in the removed association. + medium: The medium of the third-party ID (email, msisdn). + address: The address of the third-party ID (i.e. an email address). + """ + for callback in self._on_remove_user_third_party_identifier_callbacks: + try: + await callback(user_id, medium, address) + except Exception as e: + logger.exception( + "Failed to run module API callback %s: %s", callback, e + ) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index ebf8c7ed83..eaa6cad4af 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -516,11 +516,6 @@ class EventClientSerializer: # being serialized. serialized_aggregations = {} - if event_aggregations.annotations: - serialized_aggregations[ - RelationTypes.ANNOTATION - ] = event_aggregations.annotations - if event_aggregations.references: serialized_aggregations[ RelationTypes.REFERENCE diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index 797de46dbc..7e01c18c6c 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -155,9 +155,6 @@ class AccountDataHandler: max_stream_id = await self._store.remove_account_data_for_room( user_id, room_id, account_data_type ) - if max_stream_id is None: - # The referenced account data did not exist, so no delete occurred. - return None self._notifier.on_new_event( StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] @@ -230,9 +227,6 @@ class AccountDataHandler: max_stream_id = await self._store.remove_account_data_for_user( user_id, account_data_type ) - if max_stream_id is None: - # The referenced account data did not exist, so no delete occurred. - return None self._notifier.on_new_event( StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] @@ -248,7 +242,6 @@ class AccountDataHandler: instance_name=random.choice(self._account_data_writers), user_id=user_id, account_data_type=account_data_type, - content={}, ) return response["max_stream_id"] diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index b12bc4c9a3..308e38edea 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -1542,6 +1542,17 @@ class AuthHandler: async def add_threepid( self, user_id: str, medium: str, address: str, validated_at: int ) -> None: + """ + Adds an association between a user's Matrix ID and a third-party ID (email, + phone number). + + Args: + user_id: The ID of the user to associate. + medium: The medium of the third-party ID (email, msisdn). + address: The address of the third-party ID (i.e. an email address). + validated_at: The timestamp in ms of when the validation that the user owns + this third-party ID occurred. + """ # check if medium has a valid value if medium not in ["email", "msisdn"]: raise SynapseError( @@ -1566,42 +1577,44 @@ class AuthHandler: user_id, medium, address, validated_at, self.hs.get_clock().time_msec() ) + # Inform Synapse modules that a 3PID association has been created. + await self._third_party_rules.on_add_user_third_party_identifier( + user_id, medium, address + ) + + # Deprecated method for informing Synapse modules that a 3PID association + # has successfully been created. await self._third_party_rules.on_threepid_bind(user_id, medium, address) - async def delete_threepid( - self, user_id: str, medium: str, address: str, id_server: Optional[str] = None - ) -> bool: - """Attempts to unbind the 3pid on the identity servers and deletes it - from the local database. + async def delete_local_threepid( + self, user_id: str, medium: str, address: str + ) -> None: + """Deletes an association between a third-party ID and a user ID from the local + database. This method does not unbind the association from any identity servers. + + If `medium` is 'email' and a pusher is associated with this third-party ID, the + pusher will also be deleted. Args: user_id: ID of user to remove the 3pid from. medium: The medium of the 3pid being removed: "email" or "msisdn". address: The 3pid address to remove. - id_server: Use the given identity server when unbinding - any threepids. If None then will attempt to unbind using the - identity server specified when binding (if known). - - Returns: - Returns True if successfully unbound the 3pid on - the identity server, False if identity server doesn't support the - unbind API. """ - # 'Canonicalise' email addresses as per above if medium == "email": address = canonicalise_email(address) - result = await self.hs.get_identity_handler().try_unbind_threepid( - user_id, medium, address, id_server + await self.store.user_delete_threepid(user_id, medium, address) + + # Inform Synapse modules that a 3PID association has been deleted. + await self._third_party_rules.on_remove_user_third_party_identifier( + user_id, medium, address ) - await self.store.user_delete_threepid(user_id, medium, address) if medium == "email": await self.store.delete_pusher_by_app_id_pushkey_user_id( app_id="m.email", pushkey=address, user_id=user_id ) - return result async def hash(self, password: str) -> str: """Computes a secure hash of password. diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index d24f649382..d31263c717 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -100,26 +100,28 @@ class DeactivateAccountHandler: # unbinding identity_server_supports_unbinding = True - # Retrieve the 3PIDs this user has bound to an identity server - threepids = await self.store.user_get_bound_threepids(user_id) - - for threepid in threepids: + # Attempt to unbind any known bound threepids to this account from identity + # server(s). + bound_threepids = await self.store.user_get_bound_threepids(user_id) + for threepid in bound_threepids: try: result = await self._identity_handler.try_unbind_threepid( user_id, threepid["medium"], threepid["address"], id_server ) - identity_server_supports_unbinding &= result except Exception: # Do we want this to be a fatal error or should we carry on? logger.exception("Failed to remove threepid from ID server") raise SynapseError(400, "Failed to remove threepid from ID server") - await self.store.user_delete_threepid( + + identity_server_supports_unbinding &= result + + # Remove any local threepid associations for this account. + local_threepids = await self.store.user_get_threepids(user_id) + for threepid in local_threepids: + await self._auth_handler.delete_local_threepid( user_id, threepid["medium"], threepid["address"] ) - # Remove all 3PIDs this user has bound to the homeserver - await self.store.user_delete_threepids(user_id) - # delete any devices belonging to the user, which will also # delete corresponding access tokens. await self._device_handler.delete_all_devices_for_user(user_id) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 43cbece21b..4e9c8d8db0 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -1301,6 +1301,20 @@ class E2eKeysHandler: return desired_key_data + async def is_cross_signing_set_up_for_user(self, user_id: str) -> bool: + """Checks if the user has cross-signing set up + + Args: + user_id: The user to check + + Returns: + True if the user has cross-signing set up, False otherwise + """ + existing_master_key = await self.store.get_e2e_cross_signing_key( + user_id, "master" + ) + return existing_master_key is not None + def _check_cross_signing_key( key: JsonDict, user_id: str, key_type: str, signing_key: Optional[VerifyKey] = None diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index aa90d0000d..e433d6b01f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -574,7 +574,7 @@ class EventCreationHandler: state_map: Optional[StateMap[str]] = None, for_batch: bool = False, current_state_group: Optional[int] = None, - ) -> Tuple[EventBase, EventContext]: + ) -> Tuple[EventBase, UnpersistedEventContextBase]: """ Given a dict from a client, create a new event. If bool for_batch is true, will create an event using the prev_event_ids, and will create an event context for @@ -721,8 +721,6 @@ class EventCreationHandler: current_state_group=current_state_group, ) - context = await unpersisted_context.persist(event) - # In an ideal world we wouldn't need the second part of this condition. However, # this behaviour isn't spec'd yet, meaning we should be able to deactivate this # behaviour. Another reason is that this code is also evaluated each time a new @@ -739,7 +737,7 @@ class EventCreationHandler: assert state_map is not None prev_event_id = state_map.get((EventTypes.Member, event.sender)) else: - prev_state_ids = await context.get_prev_state_ids( + prev_state_ids = await unpersisted_context.get_prev_state_ids( StateFilter.from_types([(EventTypes.Member, None)]) ) prev_event_id = prev_state_ids.get((EventTypes.Member, event.sender)) @@ -764,8 +762,7 @@ class EventCreationHandler: ) self.validator.validate_new(event, self.config) - - return event, context + return event, unpersisted_context async def _is_exempt_from_privacy_policy( self, builder: EventBuilder, requester: Requester @@ -1005,7 +1002,7 @@ class EventCreationHandler: max_retries = 5 for i in range(max_retries): try: - event, context = await self.create_event( + event, unpersisted_context = await self.create_event( requester, event_dict, txn_id=txn_id, @@ -1016,6 +1013,7 @@ class EventCreationHandler: historical=historical, depth=depth, ) + context = await unpersisted_context.persist(event) assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( event.sender, @@ -1190,7 +1188,6 @@ class EventCreationHandler: if for_batch: assert prev_event_ids is not None assert state_map is not None - assert current_state_group is not None auth_ids = self._event_auth_handler.compute_auth_events(builder, state_map) event = await builder.build( prev_event_ids=prev_event_ids, auth_event_ids=auth_ids, depth=depth @@ -2046,7 +2043,7 @@ class EventCreationHandler: max_retries = 5 for i in range(max_retries): try: - event, context = await self.create_event( + event, unpersisted_context = await self.create_event( requester, { "type": EventTypes.Dummy, @@ -2055,6 +2052,7 @@ class EventCreationHandler: "sender": user_id, }, ) + context = await unpersisted_context.persist(event) event.internal_metadata.proactively_send = False diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index 0fb15391e0..553053b694 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -60,13 +60,12 @@ class BundledAggregations: Some values require additional processing during serialization. """ - annotations: Optional[JsonDict] = None references: Optional[JsonDict] = None replace: Optional[EventBase] = None thread: Optional[_ThreadAggregation] = None def __bool__(self) -> bool: - return bool(self.annotations or self.references or self.replace or self.thread) + return bool(self.references or self.replace or self.thread) class RelationsHandler: @@ -227,67 +226,6 @@ class RelationsHandler: e.msg, ) - async def get_annotations_for_events( - self, event_ids: Collection[str], ignored_users: FrozenSet[str] = frozenset() - ) -> Dict[str, List[JsonDict]]: - """Get a list of annotations to the given events, grouped by event type and - aggregation key, sorted by count. - - This is used e.g. to get the what and how many reactions have happened - on an event. - - Args: - event_ids: Fetch events that relate to these event IDs. - ignored_users: The users ignored by the requesting user. - - Returns: - A map of event IDs to a list of groups of annotations that match. - Each entry is a dict with `type`, `key` and `count` fields. - """ - # Get the base results for all users. - full_results = await self._main_store.get_aggregation_groups_for_events( - event_ids - ) - - # Avoid additional logic if there are no ignored users. - if not ignored_users: - return { - event_id: results - for event_id, results in full_results.items() - if results - } - - # Then subtract off the results for any ignored users. - ignored_results = await self._main_store.get_aggregation_groups_for_users( - [event_id for event_id, results in full_results.items() if results], - ignored_users, - ) - - filtered_results = {} - for event_id, results in full_results.items(): - # If no annotations, skip. - if not results: - continue - - # If there are not ignored results for this event, copy verbatim. - if event_id not in ignored_results: - filtered_results[event_id] = results - continue - - # Otherwise, subtract out the ignored results. - event_ignored_results = ignored_results[event_id] - for result in results: - key = (result["type"], result["key"]) - if key in event_ignored_results: - # Ensure to not modify the cache. - result = result.copy() - result["count"] -= event_ignored_results[key] - if result["count"] <= 0: - continue - filtered_results.setdefault(event_id, []).append(result) - - return filtered_results - async def get_references_for_events( self, event_ids: Collection[str], ignored_users: FrozenSet[str] = frozenset() ) -> Dict[str, List[_RelatedEvent]]: @@ -531,17 +469,6 @@ class RelationsHandler: # (as that is what makes it part of the thread). relations_by_id[latest_thread_event.event_id] = RelationTypes.THREAD - async def _fetch_annotations() -> None: - """Fetch any annotations (ie, reactions) to bundle with this event.""" - annotations_by_event_id = await self.get_annotations_for_events( - events_by_id.keys(), ignored_users=ignored_users - ) - for event_id, annotations in annotations_by_event_id.items(): - if annotations: - results.setdefault(event_id, BundledAggregations()).annotations = { - "chunk": annotations - } - async def _fetch_references() -> None: """Fetch any references to bundle with this event.""" references_by_event_id = await self.get_references_for_events( @@ -575,7 +502,6 @@ class RelationsHandler: await make_deferred_yieldable( gather_results( ( - run_in_background(_fetch_annotations), run_in_background(_fetch_references), run_in_background(_fetch_edits), ) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a26ec02284..b1784638f4 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -51,6 +51,7 @@ from synapse.api.filtering import Filter from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase +from synapse.events.snapshot import UnpersistedEventContext from synapse.events.utils import copy_and_fixup_power_levels_contents from synapse.handlers.relations import BundledAggregations from synapse.module_api import NOT_SPAM @@ -211,7 +212,7 @@ class RoomCreationHandler: # the required power level to send the tombstone event. ( tombstone_event, - tombstone_context, + tombstone_unpersisted_context, ) = await self.event_creation_handler.create_event( requester, { @@ -225,6 +226,9 @@ class RoomCreationHandler: }, }, ) + tombstone_context = await tombstone_unpersisted_context.persist( + tombstone_event + ) validate_event_for_room_version(tombstone_event) await self._event_auth_handler.check_auth_rules_from_context( tombstone_event @@ -1092,7 +1096,7 @@ class RoomCreationHandler: content: JsonDict, for_batch: bool, **kwargs: Any, - ) -> Tuple[EventBase, synapse.events.snapshot.EventContext]: + ) -> Tuple[EventBase, synapse.events.snapshot.UnpersistedEventContextBase]: """ Creates an event and associated event context. Args: @@ -1111,20 +1115,23 @@ class RoomCreationHandler: event_dict = create_event_dict(etype, content, **kwargs) - new_event, new_context = await self.event_creation_handler.create_event( + ( + new_event, + new_unpersisted_context, + ) = await self.event_creation_handler.create_event( creator, event_dict, prev_event_ids=prev_event, depth=depth, state_map=state_map, for_batch=for_batch, - current_state_group=current_state_group, ) + depth += 1 prev_event = [new_event.event_id] state_map[(new_event.type, new_event.state_key)] = new_event.event_id - return new_event, new_context + return new_event, new_unpersisted_context try: config = self._presets_dict[preset_config] @@ -1134,10 +1141,10 @@ class RoomCreationHandler: ) creation_content.update({"creator": creator_id}) - creation_event, creation_context = await create_event( + creation_event, unpersisted_creation_context = await create_event( EventTypes.Create, creation_content, False ) - + creation_context = await unpersisted_creation_context.persist(creation_event) logger.debug("Sending %s in new room", EventTypes.Member) ev = await self.event_creation_handler.handle_new_client_event( requester=creator, @@ -1181,7 +1188,6 @@ class RoomCreationHandler: power_event, power_context = await create_event( EventTypes.PowerLevels, pl_content, True ) - current_state_group = power_context._state_group events_to_send.append((power_event, power_context)) else: power_level_content: JsonDict = { @@ -1230,14 +1236,12 @@ class RoomCreationHandler: power_level_content, True, ) - current_state_group = pl_context._state_group events_to_send.append((pl_event, pl_context)) if room_alias and (EventTypes.CanonicalAlias, "") not in initial_state: room_alias_event, room_alias_context = await create_event( EventTypes.CanonicalAlias, {"alias": room_alias.to_string()}, True ) - current_state_group = room_alias_context._state_group events_to_send.append((room_alias_event, room_alias_context)) if (EventTypes.JoinRules, "") not in initial_state: @@ -1246,7 +1250,6 @@ class RoomCreationHandler: {"join_rule": config["join_rules"]}, True, ) - current_state_group = join_rules_context._state_group events_to_send.append((join_rules_event, join_rules_context)) if (EventTypes.RoomHistoryVisibility, "") not in initial_state: @@ -1255,7 +1258,6 @@ class RoomCreationHandler: {"history_visibility": config["history_visibility"]}, True, ) - current_state_group = visibility_context._state_group events_to_send.append((visibility_event, visibility_context)) if config["guest_can_join"]: @@ -1265,14 +1267,12 @@ class RoomCreationHandler: {EventContentFields.GUEST_ACCESS: GuestAccess.CAN_JOIN}, True, ) - current_state_group = guest_access_context._state_group events_to_send.append((guest_access_event, guest_access_context)) for (etype, state_key), content in initial_state.items(): event, context = await create_event( etype, content, True, state_key=state_key ) - current_state_group = context._state_group events_to_send.append((event, context)) if config["encrypted"]: @@ -1284,9 +1284,16 @@ class RoomCreationHandler: ) events_to_send.append((encryption_event, encryption_context)) + datastore = self.hs.get_datastores().state + events_and_context = ( + await UnpersistedEventContext.batch_persist_unpersisted_contexts( + events_to_send, room_id, current_state_group, datastore + ) + ) + last_event = await self.event_creation_handler.handle_new_client_event( creator, - events_to_send, + events_and_context, ignore_shadow_ban=True, ratelimit=False, ) diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 5d4ca0e2d2..bf9df60218 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -327,7 +327,7 @@ class RoomBatchHandler: # Mark all events as historical event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True - event, context = await self.event_creation_handler.create_event( + event, unpersisted_context = await self.event_creation_handler.create_event( await self.create_requester_for_user_id_from_app_service( ev["sender"], app_service_requester.app_service ), @@ -345,7 +345,7 @@ class RoomBatchHandler: historical=True, depth=inherited_depth, ) - + context = await unpersisted_context.persist(event) assert context._state_group # Normally this is done when persisting the event but we have to diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index a965c7ec76..509c557889 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -207,6 +207,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): @abc.abstractmethod async def remote_knock( self, + requester: Requester, remote_room_hosts: List[str], room_id: str, user: UserID, @@ -414,7 +415,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): max_retries = 5 for i in range(max_retries): try: - event, context = await self.event_creation_handler.create_event( + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_event( requester, { "type": EventTypes.Member, @@ -435,7 +439,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): outlier=outlier, historical=historical, ) - + context = await unpersisted_context.persist(event) prev_state_ids = await context.get_prev_state_ids( StateFilter.from_types([(EventTypes.Member, None)]) ) @@ -1070,7 +1074,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): ) return await self.remote_knock( - remote_room_hosts, room_id, target, content + requester, remote_room_hosts, room_id, target, content ) return await self._local_membership_update( @@ -1944,7 +1948,10 @@ class RoomMemberMasterHandler(RoomMemberHandler): max_retries = 5 for i in range(max_retries): try: - event, context = await self.event_creation_handler.create_event( + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_event( requester, event_dict, txn_id=txn_id, @@ -1952,6 +1959,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): auth_event_ids=auth_event_ids, outlier=True, ) + context = await unpersisted_context.persist(event) event.internal_metadata.out_of_band_membership = True result_event = ( @@ -1977,6 +1985,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): async def remote_knock( self, + requester: Requester, remote_room_hosts: List[str], room_id: str, user: UserID, diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py index ba261702d4..76e36b8a6d 100644 --- a/synapse/handlers/room_member_worker.py +++ b/synapse/handlers/room_member_worker.py @@ -113,6 +113,7 @@ class RoomMemberWorkerHandler(RoomMemberHandler): async def remote_knock( self, + requester: Requester, remote_room_hosts: List[str], room_id: str, user: UserID, @@ -123,9 +124,10 @@ class RoomMemberWorkerHandler(RoomMemberHandler): Implements RoomMemberHandler.remote_knock """ ret = await self._remote_knock_client( + requester=requester, remote_room_hosts=remote_room_hosts, room_id=room_id, - user=user, + user_id=user.to_string(), content=content, ) return ret["event_id"], ret["stream_id"] diff --git a/synapse/http/client.py b/synapse/http/client.py index a05f297933..ae48e7c3f0 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -44,6 +44,7 @@ from twisted.internet.interfaces import ( IAddress, IDelayedCall, IHostResolution, + IOpenSSLContextFactory, IReactorCore, IReactorPluggableNameResolver, IReactorTime, @@ -958,8 +959,8 @@ class InsecureInterceptableContextFactory(ssl.ContextFactory): self._context = SSL.Context(SSL.SSLv23_METHOD) self._context.set_verify(VERIFY_NONE, lambda *_: False) - def getContext(self, hostname=None, port=None): + def getContext(self) -> SSL.Context: return self._context - def creatorForNetloc(self, hostname: bytes, port: int): + def creatorForNetloc(self, hostname: bytes, port: int) -> IOpenSSLContextFactory: return self diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 312aab4dcc..3302d4e48a 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -440,7 +440,7 @@ class MatrixFederationHttpClient: Args: request: details of request to be sent - retry_on_dns_fail: true if the request should be retied on DNS failures + retry_on_dns_fail: true if the request should be retried on DNS failures timeout: number of milliseconds to wait for the response headers (including connecting to the server), *for each attempt*. @@ -475,7 +475,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -871,7 +871,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -958,7 +958,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -1036,6 +1036,8 @@ class MatrixFederationHttpClient: args: A dictionary used to create query strings, defaults to None. + retry_on_dns_fail: true if the request should be retried on DNS failures + timeout: number of milliseconds to wait for the response. self._default_timeout (60s) by default. @@ -1063,7 +1065,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -1141,7 +1143,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -1197,7 +1199,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. diff --git a/synapse/media/_base.py b/synapse/media/_base.py new file mode 100644 index 0000000000..ef8334ae25 --- /dev/null +++ b/synapse/media/_base.py @@ -0,0 +1,479 @@ +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import urllib +from abc import ABC, abstractmethod +from types import TracebackType +from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type + +import attr + +from twisted.internet.interfaces import IConsumer +from twisted.protocols.basic import FileSender +from twisted.web.server import Request + +from synapse.api.errors import Codes, SynapseError, cs_error +from synapse.http.server import finish_request, respond_with_json +from synapse.http.site import SynapseRequest +from synapse.logging.context import make_deferred_yieldable +from synapse.util.stringutils import is_ascii, parse_and_validate_server_name + +logger = logging.getLogger(__name__) + +# list all text content types that will have the charset default to UTF-8 when +# none is given +TEXT_CONTENT_TYPES = [ + "text/css", + "text/csv", + "text/html", + "text/calendar", + "text/plain", + "text/javascript", + "application/json", + "application/ld+json", + "application/rtf", + "image/svg+xml", + "text/xml", +] + + +def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: + """Parses the server name, media ID and optional file name from the request URI + + Also performs some rough validation on the server name. + + Args: + request: The `Request`. + + Returns: + A tuple containing the parsed server name, media ID and optional file name. + + Raises: + SynapseError(404): if parsing or validation fail for any reason + """ + try: + # The type on postpath seems incorrect in Twisted 21.2.0. + postpath: List[bytes] = request.postpath # type: ignore + assert postpath + + # This allows users to append e.g. /test.png to the URL. Useful for + # clients that parse the URL to see content type. + server_name_bytes, media_id_bytes = postpath[:2] + server_name = server_name_bytes.decode("utf-8") + media_id = media_id_bytes.decode("utf8") + + # Validate the server name, raising if invalid + parse_and_validate_server_name(server_name) + + file_name = None + if len(postpath) > 2: + try: + file_name = urllib.parse.unquote(postpath[-1].decode("utf-8")) + except UnicodeDecodeError: + pass + return server_name, media_id, file_name + except Exception: + raise SynapseError( + 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN + ) + + +def respond_404(request: SynapseRequest) -> None: + respond_with_json( + request, + 404, + cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + send_cors=True, + ) + + +async def respond_with_file( + request: SynapseRequest, + media_type: str, + file_path: str, + file_size: Optional[int] = None, + upload_name: Optional[str] = None, +) -> None: + logger.debug("Responding with %r", file_path) + + if os.path.isfile(file_path): + if file_size is None: + stat = os.stat(file_path) + file_size = stat.st_size + + add_file_headers(request, media_type, file_size, upload_name) + + with open(file_path, "rb") as f: + await make_deferred_yieldable(FileSender().beginFileTransfer(f, request)) + + finish_request(request) + else: + respond_404(request) + + +def add_file_headers( + request: Request, + media_type: str, + file_size: Optional[int], + upload_name: Optional[str], +) -> None: + """Adds the correct response headers in preparation for responding with the + media. + + Args: + request + media_type: The media/content type. + file_size: Size in bytes of the media, if known. + upload_name: The name of the requested file, if any. + """ + + def _quote(x: str) -> str: + return urllib.parse.quote(x.encode("utf-8")) + + # Default to a UTF-8 charset for text content types. + # ex, uses UTF-8 for 'text/css' but not 'text/css; charset=UTF-16' + if media_type.lower() in TEXT_CONTENT_TYPES: + content_type = media_type + "; charset=UTF-8" + else: + content_type = media_type + + request.setHeader(b"Content-Type", content_type.encode("UTF-8")) + if upload_name: + # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. + # + # `filename` is defined to be a `value`, which is defined by RFC2616 + # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token` + # is (essentially) a single US-ASCII word, and a `quoted-string` is a + # US-ASCII string surrounded by double-quotes, using backslash as an + # escape character. Note that %-encoding is *not* permitted. + # + # `filename*` is defined to be an `ext-value`, which is defined in + # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`, + # where `value-chars` is essentially a %-encoded string in the given charset. + # + # [1]: https://tools.ietf.org/html/rfc6266#section-4.1 + # [2]: https://tools.ietf.org/html/rfc2616#section-3.6 + # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1 + + # We avoid the quoted-string version of `filename`, because (a) synapse didn't + # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we + # may as well just do the filename* version. + if _can_encode_filename_as_token(upload_name): + disposition = "inline; filename=%s" % (upload_name,) + else: + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) + + request.setHeader(b"Content-Disposition", disposition.encode("ascii")) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") + if file_size is not None: + request.setHeader(b"Content-Length", b"%d" % (file_size,)) + + # Tell web crawlers to not index, archive, or follow links in media. This + # should help to prevent things in the media repo from showing up in web + # search results. + request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex") + + +# separators as defined in RFC2616. SP and HT are handled separately. +# see _can_encode_filename_as_token. +_FILENAME_SEPARATOR_CHARS = { + "(", + ")", + "<", + ">", + "@", + ",", + ";", + ":", + "\\", + '"', + "/", + "[", + "]", + "?", + "=", + "{", + "}", +} + + +def _can_encode_filename_as_token(x: str) -> bool: + for c in x: + # from RFC2616: + # + # token = 1*<any CHAR except CTLs or separators> + # + # separators = "(" | ")" | "<" | ">" | "@" + # | "," | ";" | ":" | "\" | <"> + # | "/" | "[" | "]" | "?" | "=" + # | "{" | "}" | SP | HT + # + # CHAR = <any US-ASCII character (octets 0 - 127)> + # + # CTL = <any US-ASCII control character + # (octets 0 - 31) and DEL (127)> + # + if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS: + return False + return True + + +async def respond_with_responder( + request: SynapseRequest, + responder: "Optional[Responder]", + media_type: str, + file_size: Optional[int], + upload_name: Optional[str] = None, +) -> None: + """Responds to the request with given responder. If responder is None then + returns 404. + + Args: + request + responder + media_type: The media/content type. + file_size: Size in bytes of the media. If not known it should be None + upload_name: The name of the requested file, if any. + """ + if not responder: + respond_404(request) + return + + # If we have a responder we *must* use it as a context manager. + with responder: + if request._disconnected: + logger.warning( + "Not sending response to request %s, already disconnected.", request + ) + return + + logger.debug("Responding to media request with responder %s", responder) + add_file_headers(request, media_type, file_size, upload_name) + try: + await responder.write_to_consumer(request) + except Exception as e: + # The majority of the time this will be due to the client having gone + # away. Unfortunately, Twisted simply throws a generic exception at us + # in that case. + logger.warning("Failed to write to consumer: %s %s", type(e), e) + + # Unregister the producer, if it has one, so Twisted doesn't complain + if request.producer: + request.unregisterProducer() + + finish_request(request) + + +class Responder(ABC): + """Represents a response that can be streamed to the requester. + + Responder is a context manager which *must* be used, so that any resources + held can be cleaned up. + """ + + @abstractmethod + def write_to_consumer(self, consumer: IConsumer) -> Awaitable: + """Stream response into consumer + + Args: + consumer: The consumer to stream into. + + Returns: + Resolves once the response has finished being written + """ + raise NotImplementedError() + + def __enter__(self) -> None: # noqa: B027 + pass + + def __exit__( # noqa: B027 + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + pass + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ThumbnailInfo: + """Details about a generated thumbnail.""" + + width: int + height: int + method: str + # Content type of thumbnail, e.g. image/png + type: str + # The size of the media file, in bytes. + length: Optional[int] = None + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class FileInfo: + """Details about a requested/uploaded file.""" + + # The server name where the media originated from, or None if local. + server_name: Optional[str] + # The local ID of the file. For local files this is the same as the media_id + file_id: str + # If the file is for the url preview cache + url_cache: bool = False + # Whether the file is a thumbnail or not. + thumbnail: Optional[ThumbnailInfo] = None + + # The below properties exist to maintain compatibility with third-party modules. + @property + def thumbnail_width(self) -> Optional[int]: + if not self.thumbnail: + return None + return self.thumbnail.width + + @property + def thumbnail_height(self) -> Optional[int]: + if not self.thumbnail: + return None + return self.thumbnail.height + + @property + def thumbnail_method(self) -> Optional[str]: + if not self.thumbnail: + return None + return self.thumbnail.method + + @property + def thumbnail_type(self) -> Optional[str]: + if not self.thumbnail: + return None + return self.thumbnail.type + + @property + def thumbnail_length(self) -> Optional[int]: + if not self.thumbnail: + return None + return self.thumbnail.length + + +def get_filename_from_headers(headers: Dict[bytes, List[bytes]]) -> Optional[str]: + """ + Get the filename of the downloaded file by inspecting the + Content-Disposition HTTP header. + + Args: + headers: The HTTP request headers. + + Returns: + The filename, or None. + """ + content_disposition = headers.get(b"Content-Disposition", [b""]) + + # No header, bail out. + if not content_disposition[0]: + return None + + _, params = _parse_header(content_disposition[0]) + + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get(b"filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith(b"utf-8''"): + upload_name_utf8 = upload_name_utf8[7:] + # We have a filename*= section. This MUST be ASCII, and any UTF-8 + # bytes are %-quoted. + try: + # Once it is decoded, we can then unquote the %-encoded + # parts strictly into a unicode string. + upload_name = urllib.parse.unquote( + upload_name_utf8.decode("ascii"), errors="strict" + ) + except UnicodeDecodeError: + # Incorrect UTF-8. + pass + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get(b"filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + upload_name = upload_name_ascii.decode("ascii") + + # This may be None here, indicating we did not find a matching name. + return upload_name + + +def _parse_header(line: bytes) -> Tuple[bytes, Dict[bytes, bytes]]: + """Parse a Content-type like header. + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + line: header to be parsed + + Returns: + The main content-type, followed by the parameter dictionary + """ + parts = _parseparam(b";" + line) + key = next(parts) + pdict = {} + for p in parts: + i = p.find(b"=") + if i >= 0: + name = p[:i].strip().lower() + value = p[i + 1 :].strip() + + # strip double-quotes + if len(value) >= 2 and value[0:1] == value[-1:] == b'"': + value = value[1:-1] + value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"') + pdict[name] = value + + return key, pdict + + +def _parseparam(s: bytes) -> Generator[bytes, None, None]: + """Generator which splits the input on ;, respecting double-quoted sequences + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + s: header to be parsed + + Returns: + The split input + """ + while s[:1] == b";": + s = s[1:] + + # look for the next ; + end = s.find(b";") + + # if there is an odd number of " marks between here and the next ;, skip to the + # next ; instead + while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2: + end = s.find(b";", end + 1) + + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:] diff --git a/synapse/rest/media/v1/filepath.py b/synapse/media/filepath.py index 1f6441c412..1f6441c412 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/media/filepath.py diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/media/media_repository.py index c70e1837af..b81e3c2b0c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/media/media_repository.py @@ -32,18 +32,10 @@ from synapse.api.errors import ( RequestSendFailed, SynapseError, ) -from synapse.config._base import ConfigError from synapse.config.repository import ThumbnailRequirement -from synapse.http.server import UnrecognizedRequestResource from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread -from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.types import UserID -from synapse.util.async_helpers import Linearizer -from synapse.util.retryutils import NotRetryingDestination -from synapse.util.stringutils import random_string - -from ._base import ( +from synapse.media._base import ( FileInfo, Responder, ThumbnailInfo, @@ -51,15 +43,15 @@ from ._base import ( respond_404, respond_with_responder, ) -from .config_resource import MediaConfigResource -from .download_resource import DownloadResource -from .filepath import MediaFilePaths -from .media_storage import MediaStorage -from .preview_url_resource import PreviewUrlResource -from .storage_provider import StorageProviderWrapper -from .thumbnail_resource import ThumbnailResource -from .thumbnailer import Thumbnailer, ThumbnailError -from .upload_resource import UploadResource +from synapse.media.filepath import MediaFilePaths +from synapse.media.media_storage import MediaStorage +from synapse.media.storage_provider import StorageProviderWrapper +from synapse.media.thumbnailer import Thumbnailer, ThumbnailError +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.types import UserID +from synapse.util.async_helpers import Linearizer +from synapse.util.retryutils import NotRetryingDestination +from synapse.util.stringutils import random_string if TYPE_CHECKING: from synapse.server import HomeServer @@ -1044,69 +1036,3 @@ class MediaRepository: removed_media.append(media_id) return removed_media, len(removed_media) - - -class MediaRepositoryResource(UnrecognizedRequestResource): - """File uploading and downloading. - - Uploads are POSTed to a resource which returns a token which is used to GET - the download:: - - => POST /_matrix/media/r0/upload HTTP/1.1 - Content-Type: <media-type> - Content-Length: <content-length> - - <media> - - <= HTTP/1.1 200 OK - Content-Type: application/json - - { "content_uri": "mxc://<server-name>/<media-id>" } - - => GET /_matrix/media/r0/download/<server-name>/<media-id> HTTP/1.1 - - <= HTTP/1.1 200 OK - Content-Type: <media-type> - Content-Disposition: attachment;filename=<upload-filename> - - <media> - - Clients can get thumbnails by supplying a desired width and height and - thumbnailing method:: - - => GET /_matrix/media/r0/thumbnail/<server_name> - /<media-id>?width=<w>&height=<h>&method=<m> HTTP/1.1 - - <= HTTP/1.1 200 OK - Content-Type: image/jpeg or image/png - - <thumbnail> - - The thumbnail methods are "crop" and "scale". "scale" tries to return an - image where either the width or the height is smaller than the requested - size. The client should then scale and letterbox the image if it needs to - fit within a given rectangle. "crop" tries to return an image where the - width and height are close to the requested size and the aspect matches - the requested size. The client should scale the image if it needs to fit - within a given rectangle. - """ - - def __init__(self, hs: "HomeServer"): - # If we're not configured to use it, raise if we somehow got here. - if not hs.config.media.can_load_media_repo: - raise ConfigError("Synapse is not configured to use a media repo.") - - super().__init__() - media_repo = hs.get_media_repository() - - self.putChild(b"upload", UploadResource(hs, media_repo)) - self.putChild(b"download", DownloadResource(hs, media_repo)) - self.putChild( - b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage) - ) - if hs.config.media.url_preview_enabled: - self.putChild( - b"preview_url", - PreviewUrlResource(hs, media_repo, media_repo.media_storage), - ) - self.putChild(b"config", MediaConfigResource(hs)) diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py new file mode 100644 index 0000000000..a7e22a91e1 --- /dev/null +++ b/synapse/media/media_storage.py @@ -0,0 +1,374 @@ +# Copyright 2018-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +import logging +import os +import shutil +from types import TracebackType +from typing import ( + IO, + TYPE_CHECKING, + Any, + Awaitable, + BinaryIO, + Callable, + Generator, + Optional, + Sequence, + Tuple, + Type, +) + +import attr + +from twisted.internet.defer import Deferred +from twisted.internet.interfaces import IConsumer +from twisted.protocols.basic import FileSender + +import synapse +from synapse.api.errors import NotFoundError +from synapse.logging.context import defer_to_thread, make_deferred_yieldable +from synapse.util import Clock +from synapse.util.file_consumer import BackgroundFileConsumer + +from ._base import FileInfo, Responder +from .filepath import MediaFilePaths + +if TYPE_CHECKING: + from synapse.media.storage_provider import StorageProvider + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class MediaStorage: + """Responsible for storing/fetching files from local sources. + + Args: + hs + local_media_directory: Base path where we store media on disk + filepaths + storage_providers: List of StorageProvider that are used to fetch and store files. + """ + + def __init__( + self, + hs: "HomeServer", + local_media_directory: str, + filepaths: MediaFilePaths, + storage_providers: Sequence["StorageProvider"], + ): + self.hs = hs + self.reactor = hs.get_reactor() + self.local_media_directory = local_media_directory + self.filepaths = filepaths + self.storage_providers = storage_providers + self.spam_checker = hs.get_spam_checker() + self.clock = hs.get_clock() + + async def store_file(self, source: IO, file_info: FileInfo) -> str: + """Write `source` to the on disk media store, and also any other + configured storage providers + + Args: + source: A file like object that should be written + file_info: Info about the file to store + + Returns: + the file path written to in the primary media store + """ + + with self.store_into_file(file_info) as (f, fname, finish_cb): + # Write to the main repository + await self.write_to_file(source, f) + await finish_cb() + + return fname + + async def write_to_file(self, source: IO, output: IO) -> None: + """Asynchronously write the `source` to `output`.""" + await defer_to_thread(self.reactor, _write_file_synchronously, source, output) + + @contextlib.contextmanager + def store_into_file( + self, file_info: FileInfo + ) -> Generator[Tuple[BinaryIO, str, Callable[[], Awaitable[None]]], None, None]: + """Context manager used to get a file like object to write into, as + described by file_info. + + Actually yields a 3-tuple (file, fname, finish_cb), where file is a file + like object that can be written to, fname is the absolute path of file + on disk, and finish_cb is a function that returns an awaitable. + + fname can be used to read the contents from after upload, e.g. to + generate thumbnails. + + finish_cb must be called and waited on after the file has been + successfully been written to. Should not be called if there was an + error. + + Args: + file_info: Info about the file to store + + Example: + + with media_storage.store_into_file(info) as (f, fname, finish_cb): + # .. write into f ... + await finish_cb() + """ + + path = self._file_info_to_path(file_info) + fname = os.path.join(self.local_media_directory, path) + + dirname = os.path.dirname(fname) + os.makedirs(dirname, exist_ok=True) + + finished_called = [False] + + try: + with open(fname, "wb") as f: + + async def finish() -> None: + # Ensure that all writes have been flushed and close the + # file. + f.flush() + f.close() + + spam_check = await self.spam_checker.check_media_file_for_spam( + ReadableFileWrapper(self.clock, fname), file_info + ) + if spam_check != synapse.module_api.NOT_SPAM: + logger.info("Blocking media due to spam checker") + # Note that we'll delete the stored media, due to the + # try/except below. The media also won't be stored in + # the DB. + # We currently ignore any additional field returned by + # the spam-check API. + raise SpamMediaException(errcode=spam_check[0]) + + for provider in self.storage_providers: + await provider.store_file(path, file_info) + + finished_called[0] = True + + yield f, fname, finish + except Exception as e: + try: + os.remove(fname) + except Exception: + pass + + raise e from None + + if not finished_called: + raise Exception("Finished callback not called") + + async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: + """Attempts to fetch media described by file_info from the local cache + and configured storage providers. + + Args: + file_info + + Returns: + Returns a Responder if the file was found, otherwise None. + """ + paths = [self._file_info_to_path(file_info)] + + # fallback for remote thumbnails with no method in the filename + if file_info.thumbnail and file_info.server_name: + paths.append( + self.filepaths.remote_media_thumbnail_rel_legacy( + server_name=file_info.server_name, + file_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + ) + ) + + for path in paths: + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + logger.debug("responding with local file %s", local_path) + return FileResponder(open(local_path, "rb")) + logger.debug("local file %s did not exist", local_path) + + for provider in self.storage_providers: + for path in paths: + res: Any = await provider.fetch(path, file_info) + if res: + logger.debug("Streaming %s from %s", path, provider) + return res + logger.debug("%s not found on %s", path, provider) + + return None + + async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: + """Ensures that the given file is in the local cache. Attempts to + download it from storage providers if it isn't. + + Args: + file_info + + Returns: + Full path to local file + """ + path = self._file_info_to_path(file_info) + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + return local_path + + # Fallback for paths without method names + # Should be removed in the future + if file_info.thumbnail and file_info.server_name: + legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy( + server_name=file_info.server_name, + file_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + ) + legacy_local_path = os.path.join(self.local_media_directory, legacy_path) + if os.path.exists(legacy_local_path): + return legacy_local_path + + dirname = os.path.dirname(local_path) + os.makedirs(dirname, exist_ok=True) + + for provider in self.storage_providers: + res: Any = await provider.fetch(path, file_info) + if res: + with res: + consumer = BackgroundFileConsumer( + open(local_path, "wb"), self.reactor + ) + await res.write_to_consumer(consumer) + await consumer.wait() + return local_path + + raise NotFoundError() + + def _file_info_to_path(self, file_info: FileInfo) -> str: + """Converts file_info into a relative path. + + The path is suitable for storing files under a directory, e.g. used to + store files on local FS under the base media repository directory. + """ + if file_info.url_cache: + if file_info.thumbnail: + return self.filepaths.url_cache_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, + ) + return self.filepaths.url_cache_filepath_rel(file_info.file_id) + + if file_info.server_name: + if file_info.thumbnail: + return self.filepaths.remote_media_thumbnail_rel( + server_name=file_info.server_name, + file_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, + ) + return self.filepaths.remote_media_filepath_rel( + file_info.server_name, file_info.file_id + ) + + if file_info.thumbnail: + return self.filepaths.local_media_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, + ) + return self.filepaths.local_media_filepath_rel(file_info.file_id) + + +def _write_file_synchronously(source: IO, dest: IO) -> None: + """Write `source` to the file like `dest` synchronously. Should be called + from a thread. + + Args: + source: A file like object that's to be written + dest: A file like object to be written to + """ + source.seek(0) # Ensure we read from the start of the file + shutil.copyfileobj(source, dest) + + +class FileResponder(Responder): + """Wraps an open file that can be sent to a request. + + Args: + open_file: A file like object to be streamed ot the client, + is closed when finished streaming. + """ + + def __init__(self, open_file: IO): + self.open_file = open_file + + def write_to_consumer(self, consumer: IConsumer) -> Deferred: + return make_deferred_yieldable( + FileSender().beginFileTransfer(self.open_file, consumer) + ) + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + self.open_file.close() + + +class SpamMediaException(NotFoundError): + """The media was blocked by a spam checker, so we simply 404 the request (in + the same way as if it was quarantined). + """ + + +@attr.s(slots=True, auto_attribs=True) +class ReadableFileWrapper: + """Wrapper that allows reading a file in chunks, yielding to the reactor, + and writing to a callback. + + This is simplified `FileSender` that takes an IO object rather than an + `IConsumer`. + """ + + CHUNK_SIZE = 2**14 + + clock: Clock + path: str + + async def write_chunks_to(self, callback: Callable[[bytes], object]) -> None: + """Reads the file in chunks and calls the callback with each chunk.""" + + with open(self.path, "rb") as file: + while True: + chunk = file.read(self.CHUNK_SIZE) + if not chunk: + break + + callback(chunk) + + # We yield to the reactor by sleeping for 0 seconds. + await self.clock.sleep(0) diff --git a/synapse/rest/media/v1/oembed.py b/synapse/media/oembed.py index 7592aa5d47..c0eaf04be5 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/media/oembed.py @@ -18,7 +18,7 @@ from typing import TYPE_CHECKING, List, Optional import attr -from synapse.rest.media.v1.preview_html import parse_html_description +from synapse.media.preview_html import parse_html_description from synapse.types import JsonDict from synapse.util import json_decoder diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/media/preview_html.py index 516d0434f0..516d0434f0 100644 --- a/synapse/rest/media/v1/preview_html.py +++ b/synapse/media/preview_html.py diff --git a/synapse/media/storage_provider.py b/synapse/media/storage_provider.py new file mode 100644 index 0000000000..1c9b71d69c --- /dev/null +++ b/synapse/media/storage_provider.py @@ -0,0 +1,181 @@ +# Copyright 2018-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +import logging +import os +import shutil +from typing import TYPE_CHECKING, Callable, Optional + +from synapse.config._base import Config +from synapse.logging.context import defer_to_thread, run_in_background +from synapse.util.async_helpers import maybe_awaitable + +from ._base import FileInfo, Responder +from .media_storage import FileResponder + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + from synapse.server import HomeServer + + +class StorageProvider(metaclass=abc.ABCMeta): + """A storage provider is a service that can store uploaded media and + retrieve them. + """ + + @abc.abstractmethod + async def store_file(self, path: str, file_info: FileInfo) -> None: + """Store the file described by file_info. The actual contents can be + retrieved by reading the file in file_info.upload_path. + + Args: + path: Relative path of file in local cache + file_info: The metadata of the file. + """ + + @abc.abstractmethod + async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: + """Attempt to fetch the file described by file_info and stream it + into writer. + + Args: + path: Relative path of file in local cache + file_info: The metadata of the file. + + Returns: + Returns a Responder if the provider has the file, otherwise returns None. + """ + + +class StorageProviderWrapper(StorageProvider): + """Wraps a storage provider and provides various config options + + Args: + backend: The storage provider to wrap. + store_local: Whether to store new local files or not. + store_synchronous: Whether to wait for file to be successfully + uploaded, or todo the upload in the background. + store_remote: Whether remote media should be uploaded + """ + + def __init__( + self, + backend: StorageProvider, + store_local: bool, + store_synchronous: bool, + store_remote: bool, + ): + self.backend = backend + self.store_local = store_local + self.store_synchronous = store_synchronous + self.store_remote = store_remote + + def __str__(self) -> str: + return "StorageProviderWrapper[%s]" % (self.backend,) + + async def store_file(self, path: str, file_info: FileInfo) -> None: + if not file_info.server_name and not self.store_local: + return None + + if file_info.server_name and not self.store_remote: + return None + + if file_info.url_cache: + # The URL preview cache is short lived and not worth offloading or + # backing up. + return None + + if self.store_synchronous: + # store_file is supposed to return an Awaitable, but guard + # against improper implementations. + await maybe_awaitable(self.backend.store_file(path, file_info)) # type: ignore + else: + # TODO: Handle errors. + async def store() -> None: + try: + return await maybe_awaitable( + self.backend.store_file(path, file_info) + ) + except Exception: + logger.exception("Error storing file") + + run_in_background(store) + + async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: + if file_info.url_cache: + # Files in the URL preview cache definitely aren't stored here, + # so avoid any potentially slow I/O or network access. + return None + + # store_file is supposed to return an Awaitable, but guard + # against improper implementations. + return await maybe_awaitable(self.backend.fetch(path, file_info)) + + +class FileStorageProviderBackend(StorageProvider): + """A storage provider that stores files in a directory on a filesystem. + + Args: + hs + config: The config returned by `parse_config`. + """ + + def __init__(self, hs: "HomeServer", config: str): + self.hs = hs + self.cache_directory = hs.config.media.media_store_path + self.base_directory = config + + def __str__(self) -> str: + return "FileStorageProviderBackend[%s]" % (self.base_directory,) + + async def store_file(self, path: str, file_info: FileInfo) -> None: + """See StorageProvider.store_file""" + + primary_fname = os.path.join(self.cache_directory, path) + backup_fname = os.path.join(self.base_directory, path) + + dirname = os.path.dirname(backup_fname) + os.makedirs(dirname, exist_ok=True) + + # mypy needs help inferring the type of the second parameter, which is generic + shutil_copyfile: Callable[[str, str], str] = shutil.copyfile + await defer_to_thread( + self.hs.get_reactor(), + shutil_copyfile, + primary_fname, + backup_fname, + ) + + async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: + """See StorageProvider.fetch""" + + backup_fname = os.path.join(self.base_directory, path) + if os.path.isfile(backup_fname): + return FileResponder(open(backup_fname, "rb")) + + return None + + @staticmethod + def parse_config(config: dict) -> str: + """Called on startup to parse config supplied. This should parse + the config and raise if there is a problem. + + The returned value is passed into the constructor. + + In this case we only care about a single param, the directory, so let's + just pull that out. + """ + return Config.ensure_directory(config["directory"]) diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/media/thumbnailer.py index f909a4fb9a..f909a4fb9a 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/media/thumbnailer.py diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 1964276a54..424239e3df 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -64,9 +64,11 @@ from synapse.events.third_party_rules import ( CHECK_EVENT_ALLOWED_CALLBACK, CHECK_THREEPID_CAN_BE_INVITED_CALLBACK, CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK, + ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK, ON_CREATE_ROOM_CALLBACK, ON_NEW_EVENT_CALLBACK, ON_PROFILE_UPDATE_CALLBACK, + ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK, ON_THREEPID_BIND_CALLBACK, ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK, ) @@ -357,6 +359,12 @@ class ModuleApi: ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK ] = None, on_threepid_bind: Optional[ON_THREEPID_BIND_CALLBACK] = None, + on_add_user_third_party_identifier: Optional[ + ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK + ] = None, + on_remove_user_third_party_identifier: Optional[ + ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK + ] = None, ) -> None: """Registers callbacks for third party event rules capabilities. @@ -373,6 +381,8 @@ class ModuleApi: on_profile_update=on_profile_update, on_user_deactivation_status_changed=on_user_deactivation_status_changed, on_threepid_bind=on_threepid_bind, + on_add_user_third_party_identifier=on_add_user_third_party_identifier, + on_remove_user_third_party_identifier=on_remove_user_third_party_identifier, ) def register_presence_router_callbacks( diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 3c4a152d6b..abcf687f05 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -23,7 +23,6 @@ from typing import ( Mapping, Optional, Sequence, - Set, Tuple, Union, ) @@ -396,18 +395,10 @@ class BulkPushRuleEvaluator: del notification_levels[key] # Pull out any user and room mentions. - mentions = event.content.get(EventContentFields.MSC3952_MENTIONS) - has_mentions = self._intentional_mentions_enabled and isinstance(mentions, dict) - user_mentions: Set[str] = set() - if has_mentions: - # mypy seems to have lost the type even though it must be a dict here. - assert isinstance(mentions, dict) - # Remove out any non-string items and convert to a set. - user_mentions_raw = mentions.get("user_ids") - if isinstance(user_mentions_raw, list): - user_mentions = set( - filter(lambda item: isinstance(item, str), user_mentions_raw) - ) + has_mentions = ( + self._intentional_mentions_enabled + and EventContentFields.MSC3952_MENTIONS in event.content + ) evaluator = PushRuleEvaluator( _flatten_dict( @@ -415,7 +406,6 @@ class BulkPushRuleEvaluator: msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key, ), has_mentions, - user_mentions, room_member_count, sender_power_level, notification_levels, diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py index bb76c169c6..222afbdcc8 100644 --- a/synapse/push/clientformat.py +++ b/synapse/push/clientformat.py @@ -41,11 +41,12 @@ def format_push_rules_for_user( rulearray.append(template_rule) - pattern_type = template_rule.pop("pattern_type", None) - if pattern_type == "user_id": - template_rule["pattern"] = user.to_string() - elif pattern_type == "user_localpart": - template_rule["pattern"] = user.localpart + for type_key in ("pattern", "value"): + type_value = template_rule.pop(f"{type_key}_type", None) + if type_value == "user_id": + template_rule[type_key] = user.to_string() + elif type_value == "user_localpart": + template_rule[type_key] = user.localpart template_rule["enabled"] = enabled diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py index 9fa1060d48..67b01db67e 100644 --- a/synapse/replication/http/membership.py +++ b/synapse/replication/http/membership.py @@ -142,17 +142,12 @@ class ReplicationRemoteKnockRestServlet(ReplicationEndpoint): } async def _handle_request( # type: ignore[override] - self, - request: SynapseRequest, - content: JsonDict, - room_id: str, - user_id: str, + self, request: SynapseRequest, content: JsonDict, room_id: str, user_id: str ) -> Tuple[int, JsonDict]: remote_room_hosts = content["remote_room_hosts"] event_content = content["content"] requester = Requester.deserialize(self.store, content["requester"]) - request.requester = requester logger.debug("remote_knock: %s on room: %s", user_id, room_id) @@ -277,16 +272,12 @@ class ReplicationRemoteRescindKnockRestServlet(ReplicationEndpoint): } async def _handle_request( # type: ignore[override] - self, - request: SynapseRequest, - content: JsonDict, - knock_event_id: str, + self, request: SynapseRequest, content: JsonDict, knock_event_id: str ) -> Tuple[int, JsonDict]: txn_id = content["txn_id"] event_content = content["content"] requester = Requester.deserialize(self.store, content["requester"]) - request.requester = requester # hopefully we're now on the master, so this won't recurse! @@ -363,3 +354,5 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ReplicationRemoteJoinRestServlet(hs).register(http_server) ReplicationRemoteRejectInviteRestServlet(hs).register(http_server) ReplicationUserJoinedLeftRoomRestServlet(hs).register(http_server) + ReplicationRemoteKnockRestServlet(hs).register(http_server) + ReplicationRemoteRescindKnockRestServlet(hs).register(http_server) diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 9d17eff714..347467d863 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -238,6 +238,24 @@ class ReplicationStreamer: except Exception: logger.exception("Failed to replicate") + # The last token we send may not match the current + # token, in which case we want to send out a `POSITION` + # to tell other workers the actual current position. + if updates[-1][0] < current_token: + logger.info( + "Sending position: %s -> %s", + stream.NAME, + current_token, + ) + self.command_handler.send_command( + PositionCommand( + stream.NAME, + self._instance_name, + updates[-1][0], + current_token, + ) + ) + logger.debug("No more pending updates, breaking poke loop") finally: self.pending_updates = False diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 14c4e6ebbb..2e19e055d3 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -108,8 +108,7 @@ class ClientRestResource(JsonResource): if is_main_process: logout.register_servlets(hs, client_resource) sync.register_servlets(hs, client_resource) - if is_main_process: - filter.register_servlets(hs, client_resource) + filter.register_servlets(hs, client_resource) account.register_servlets(hs, client_resource) register.register_servlets(hs, client_resource) if is_main_process: @@ -140,7 +139,7 @@ class ClientRestResource(JsonResource): relations.register_servlets(hs, client_resource) if is_main_process: password_policy.register_servlets(hs, client_resource) - knock.register_servlets(hs, client_resource) + knock.register_servlets(hs, client_resource) # moving to /_synapse/admin if is_main_process: diff --git a/synapse/rest/admin/event_reports.py b/synapse/rest/admin/event_reports.py index a3beb74e2c..c546ef7e23 100644 --- a/synapse/rest/admin/event_reports.py +++ b/synapse/rest/admin/event_reports.py @@ -53,11 +53,11 @@ class EventReportsRestServlet(RestServlet): PATTERNS = admin_patterns("/event_reports$") def __init__(self, hs: "HomeServer"): - self.auth = hs.get_auth() - self.store = hs.get_datastores().main + self._auth = hs.get_auth() + self._store = hs.get_datastores().main async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - await assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self._auth, request) start = parse_integer(request, "from", default=0) limit = parse_integer(request, "limit", default=100) @@ -79,7 +79,7 @@ class EventReportsRestServlet(RestServlet): errcode=Codes.INVALID_PARAM, ) - event_reports, total = await self.store.get_event_reports_paginate( + event_reports, total = await self._store.get_event_reports_paginate( start, limit, direction, user_id, room_id ) ret = {"event_reports": event_reports, "total": total} @@ -108,13 +108,13 @@ class EventReportDetailRestServlet(RestServlet): PATTERNS = admin_patterns("/event_reports/(?P<report_id>[^/]*)$") def __init__(self, hs: "HomeServer"): - self.auth = hs.get_auth() - self.store = hs.get_datastores().main + self._auth = hs.get_auth() + self._store = hs.get_datastores().main async def on_GET( self, request: SynapseRequest, report_id: str ) -> Tuple[int, JsonDict]: - await assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self._auth, request) message = ( "The report_id parameter must be a string representing a positive integer." @@ -131,8 +131,33 @@ class EventReportDetailRestServlet(RestServlet): HTTPStatus.BAD_REQUEST, message, errcode=Codes.INVALID_PARAM ) - ret = await self.store.get_event_report(resolved_report_id) + ret = await self._store.get_event_report(resolved_report_id) if not ret: raise NotFoundError("Event report not found") return HTTPStatus.OK, ret + + async def on_DELETE( + self, request: SynapseRequest, report_id: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + message = ( + "The report_id parameter must be a string representing a positive integer." + ) + try: + resolved_report_id = int(report_id) + except ValueError: + raise SynapseError( + HTTPStatus.BAD_REQUEST, message, errcode=Codes.INVALID_PARAM + ) + + if resolved_report_id < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, message, errcode=Codes.INVALID_PARAM + ) + + if await self._store.delete_event_report(resolved_report_id): + return HTTPStatus.OK, {} + + raise NotFoundError("Event report not found") diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 7cc4db20d6..357e9a574d 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -304,13 +304,20 @@ class UserRestServletV2(RestServlet): # remove old threepids for medium, address in del_threepids: try: - await self.auth_handler.delete_threepid( - user_id, medium, address, None + # Attempt to remove any known bindings of this third-party ID + # and user ID from identity servers. + await self.hs.get_identity_handler().try_unbind_threepid( + user_id, medium, address, id_server=None ) except Exception: logger.exception("Failed to remove threepids") raise SynapseError(500, "Failed to remove threepids") + # Delete the local association of this user ID and third-party ID. + await self.auth_handler.delete_local_threepid( + user_id, medium, address + ) + # add new threepids current_time = self.hs.get_clock().time_msec() for medium, address in add_threepids: diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py index 662f5bf762..484d7440a4 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py @@ -768,7 +768,9 @@ class ThreepidDeleteRestServlet(RestServlet): user_id = requester.user.to_string() try: - ret = await self.auth_handler.delete_threepid( + # Attempt to remove any known bindings of this third-party ID + # and user ID from identity servers. + ret = await self.hs.get_identity_handler().try_unbind_threepid( user_id, body.medium, body.address, body.id_server ) except Exception: @@ -783,6 +785,11 @@ class ThreepidDeleteRestServlet(RestServlet): else: id_server_unbind_result = "no-support" + # Delete the local association of this user ID and third-party ID. + await self.auth_handler.delete_local_threepid( + user_id, body.medium, body.address + ) + return 200, {"id_server_unbind_result": id_server_unbind_result} diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py index 486c6dbbc5..dab4a77f7e 100644 --- a/synapse/rest/client/devices.py +++ b/synapse/rest/client/devices.py @@ -255,7 +255,7 @@ class DehydratedDeviceServlet(RestServlet): """ - PATTERNS = client_patterns("/org.matrix.msc2697.v2/dehydrated_device", releases=()) + PATTERNS = client_patterns("/org.matrix.msc2697.v2/dehydrated_device$", releases=()) def __init__(self, hs: "HomeServer"): super().__init__() diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py index 7873b363c0..32bb8b9a91 100644 --- a/synapse/rest/client/keys.py +++ b/synapse/rest/client/keys.py @@ -312,15 +312,29 @@ class SigningKeyUploadServlet(RestServlet): user_id = requester.user.to_string() body = parse_json_object_from_request(request) - await self.auth_handler.validate_user_via_ui_auth( - requester, - request, - body, - "add a device signing key to your account", - # Allow skipping of UI auth since this is frequently called directly - # after login and it is silly to ask users to re-auth immediately. - can_skip_ui_auth=True, - ) + if self.hs.config.experimental.msc3967_enabled: + if await self.e2e_keys_handler.is_cross_signing_set_up_for_user(user_id): + # If we already have a master key then cross signing is set up and we require UIA to reset + await self.auth_handler.validate_user_via_ui_auth( + requester, + request, + body, + "reset the device signing key on your account", + # Do not allow skipping of UIA auth. + can_skip_ui_auth=False, + ) + # Otherwise we don't require UIA since we are setting up cross signing for first time + else: + # Previous behaviour is to always require UIA but allow it to be skipped + await self.auth_handler.validate_user_via_ui_auth( + requester, + request, + body, + "add a device signing key to your account", + # Allow skipping of UI auth since this is frequently called directly + # after login and it is silly to ask users to re-auth immediately. + can_skip_ui_auth=True, + ) result = await self.e2e_keys_handler.upload_signing_keys_for_user(user_id, body) return 200, result diff --git a/synapse/rest/client/knock.py b/synapse/rest/client/knock.py index ad025c8a45..4fa66904ba 100644 --- a/synapse/rest/client/knock.py +++ b/synapse/rest/client/knock.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Dict, List, Tuple from synapse.api.constants import Membership from synapse.api.errors import SynapseError @@ -24,8 +24,6 @@ from synapse.http.servlet import ( parse_strings_from_args, ) from synapse.http.site import SynapseRequest -from synapse.logging.opentracing import set_tag -from synapse.rest.client.transactions import HttpTransactionCache from synapse.types import JsonDict, RoomAlias, RoomID if TYPE_CHECKING: @@ -45,7 +43,6 @@ class KnockRoomAliasServlet(RestServlet): def __init__(self, hs: "HomeServer"): super().__init__() - self.txns = HttpTransactionCache(hs) self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() @@ -53,7 +50,6 @@ class KnockRoomAliasServlet(RestServlet): self, request: SynapseRequest, room_identifier: str, - txn_id: Optional[str] = None, ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) @@ -67,7 +63,6 @@ class KnockRoomAliasServlet(RestServlet): # twisted.web.server.Request.args is incorrectly defined as Optional[Any] args: Dict[bytes, List[bytes]] = request.args # type: ignore - remote_room_hosts = parse_strings_from_args( args, "server_name", required=False ) @@ -86,7 +81,6 @@ class KnockRoomAliasServlet(RestServlet): target=requester.user, room_id=room_id, action=Membership.KNOCK, - txn_id=txn_id, third_party_signed=None, remote_room_hosts=remote_room_hosts, content=event_content, @@ -94,15 +88,6 @@ class KnockRoomAliasServlet(RestServlet): return 200, {"room_id": room_id} - def on_PUT( - self, request: SynapseRequest, room_identifier: str, txn_id: str - ) -> Awaitable[Tuple[int, JsonDict]]: - set_tag("txn_id", txn_id) - - return self.txns.fetch_or_execute_request( - request, self.on_POST, request, room_identifier, txn_id - ) - def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: KnockRoomAliasServlet(hs).register(http_server) diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 14b04810a1..45aee3d3fe 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -926,7 +926,7 @@ class RoomMembershipRestServlet(TransactionRestServlet): self.auth = hs.get_auth() def register(self, http_server: HttpServer) -> None: - # /rooms/$roomid/[invite|join|leave] + # /rooms/$roomid/[join|invite|leave|ban|unban|kick] PATTERNS = ( "/rooms/(?P<room_id>[^/]*)/" "(?P<membership_action>join|invite|leave|ban|unban|kick)" diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index f2013faeb2..8fcb8ac3d9 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -16,7 +16,7 @@ import logging from collections import defaultdict from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from synapse.api.constants import EduTypes, Membership, PresenceState +from synapse.api.constants import AccountDataTypes, EduTypes, Membership, PresenceState from synapse.api.errors import Codes, StoreError, SynapseError from synapse.api.filtering import FilterCollection from synapse.api.presence import UserPresenceState @@ -139,7 +139,28 @@ class SyncRestServlet(RestServlet): device_id, ) - request_key = (user, timeout, since, filter_id, full_state, device_id) + # Stream position of the last ignored users account data event for this user, + # if we're initial syncing. + # We include this in the request key to invalidate an initial sync + # in the response cache once the set of ignored users has changed. + # (We filter out ignored users from timeline events, so our sync response + # is invalid once the set of ignored users changes.) + last_ignore_accdata_streampos: Optional[int] = None + if not since: + # No `since`, so this is an initial sync. + last_ignore_accdata_streampos = await self.store.get_latest_stream_id_for_global_account_data_by_type_for_user( + user.to_string(), AccountDataTypes.IGNORED_USER_LIST + ) + + request_key = ( + user, + timeout, + since, + filter_id, + full_state, + device_id, + last_ignore_accdata_streampos, + ) if filter_id is None: filter_collection = self.filtering.DEFAULT_FILTER_COLLECTION diff --git a/synapse/rest/media/v1/config_resource.py b/synapse/rest/media/config_resource.py index a95804d327..a95804d327 100644 --- a/synapse/rest/media/v1/config_resource.py +++ b/synapse/rest/media/config_resource.py diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/download_resource.py index 048a042692..8f270cf4cc 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/download_resource.py @@ -22,11 +22,10 @@ from synapse.http.server import ( ) from synapse.http.servlet import parse_boolean from synapse.http.site import SynapseRequest - -from ._base import parse_media_id, respond_404 +from synapse.media._base import parse_media_id, respond_404 if TYPE_CHECKING: - from synapse.rest.media.v1.media_repository import MediaRepository + from synapse.media.media_repository import MediaRepository from synapse.server import HomeServer logger = logging.getLogger(__name__) diff --git a/synapse/rest/media/media_repository_resource.py b/synapse/rest/media/media_repository_resource.py new file mode 100644 index 0000000000..5ebaa3b032 --- /dev/null +++ b/synapse/rest/media/media_repository_resource.py @@ -0,0 +1,93 @@ +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import TYPE_CHECKING + +from synapse.config._base import ConfigError +from synapse.http.server import UnrecognizedRequestResource + +from .config_resource import MediaConfigResource +from .download_resource import DownloadResource +from .preview_url_resource import PreviewUrlResource +from .thumbnail_resource import ThumbnailResource +from .upload_resource import UploadResource + +if TYPE_CHECKING: + from synapse.server import HomeServer + + +class MediaRepositoryResource(UnrecognizedRequestResource): + """File uploading and downloading. + + Uploads are POSTed to a resource which returns a token which is used to GET + the download:: + + => POST /_matrix/media/r0/upload HTTP/1.1 + Content-Type: <media-type> + Content-Length: <content-length> + + <media> + + <= HTTP/1.1 200 OK + Content-Type: application/json + + { "content_uri": "mxc://<server-name>/<media-id>" } + + => GET /_matrix/media/r0/download/<server-name>/<media-id> HTTP/1.1 + + <= HTTP/1.1 200 OK + Content-Type: <media-type> + Content-Disposition: attachment;filename=<upload-filename> + + <media> + + Clients can get thumbnails by supplying a desired width and height and + thumbnailing method:: + + => GET /_matrix/media/r0/thumbnail/<server_name> + /<media-id>?width=<w>&height=<h>&method=<m> HTTP/1.1 + + <= HTTP/1.1 200 OK + Content-Type: image/jpeg or image/png + + <thumbnail> + + The thumbnail methods are "crop" and "scale". "scale" tries to return an + image where either the width or the height is smaller than the requested + size. The client should then scale and letterbox the image if it needs to + fit within a given rectangle. "crop" tries to return an image where the + width and height are close to the requested size and the aspect matches + the requested size. The client should scale the image if it needs to fit + within a given rectangle. + """ + + def __init__(self, hs: "HomeServer"): + # If we're not configured to use it, raise if we somehow got here. + if not hs.config.media.can_load_media_repo: + raise ConfigError("Synapse is not configured to use a media repo.") + + super().__init__() + media_repo = hs.get_media_repository() + + self.putChild(b"upload", UploadResource(hs, media_repo)) + self.putChild(b"download", DownloadResource(hs, media_repo)) + self.putChild( + b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage) + ) + if hs.config.media.url_preview_enabled: + self.putChild( + b"preview_url", + PreviewUrlResource(hs, media_repo, media_repo.media_storage), + ) + self.putChild(b"config", MediaConfigResource(hs)) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/preview_url_resource.py index 4a594ab9d8..7ada728757 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/preview_url_resource.py @@ -40,21 +40,19 @@ from synapse.http.server import ( from synapse.http.servlet import parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable, run_in_background +from synapse.media._base import FileInfo, get_filename_from_headers +from synapse.media.media_storage import MediaStorage +from synapse.media.oembed import OEmbedProvider +from synapse.media.preview_html import decode_body, parse_html_to_open_graph from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.rest.media.v1._base import get_filename_from_headers -from synapse.rest.media.v1.media_storage import MediaStorage -from synapse.rest.media.v1.oembed import OEmbedProvider -from synapse.rest.media.v1.preview_html import decode_body, parse_html_to_open_graph from synapse.types import JsonDict, UserID from synapse.util import json_encoder from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.stringutils import random_string -from ._base import FileInfo - if TYPE_CHECKING: - from synapse.rest.media.v1.media_repository import MediaRepository + from synapse.media.media_repository import MediaRepository from synapse.server import HomeServer logger = logging.getLogger(__name__) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py index 3e720018b3..4ee2a0dbda 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/thumbnail_resource.py @@ -27,9 +27,7 @@ from synapse.http.server import ( ) from synapse.http.servlet import parse_integer, parse_string from synapse.http.site import SynapseRequest -from synapse.rest.media.v1.media_storage import MediaStorage - -from ._base import ( +from synapse.media._base import ( FileInfo, ThumbnailInfo, parse_media_id, @@ -37,9 +35,10 @@ from ._base import ( respond_with_file, respond_with_responder, ) +from synapse.media.media_storage import MediaStorage if TYPE_CHECKING: - from synapse.rest.media.v1.media_repository import MediaRepository + from synapse.media.media_repository import MediaRepository from synapse.server import HomeServer logger = logging.getLogger(__name__) diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/upload_resource.py index 97548b54e5..697348613b 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/upload_resource.py @@ -20,10 +20,10 @@ from synapse.api.errors import Codes, SynapseError from synapse.http.server import DirectServeJsonResource, respond_with_json from synapse.http.servlet import parse_bytes_from_args from synapse.http.site import SynapseRequest -from synapse.rest.media.v1.media_storage import SpamMediaException +from synapse.media.media_storage import SpamMediaException if TYPE_CHECKING: - from synapse.rest.media.v1.media_repository import MediaRepository + from synapse.media.media_repository import MediaRepository from synapse.server import HomeServer logger = logging.getLogger(__name__) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index ef8334ae25..88427a5737 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -1,5 +1,4 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2019-2021 The Matrix.org Foundation C.I.C. +# Copyright 2023 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,468 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -import logging -import os -import urllib -from abc import ABC, abstractmethod -from types import TracebackType -from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type - -import attr - -from twisted.internet.interfaces import IConsumer -from twisted.protocols.basic import FileSender -from twisted.web.server import Request - -from synapse.api.errors import Codes, SynapseError, cs_error -from synapse.http.server import finish_request, respond_with_json -from synapse.http.site import SynapseRequest -from synapse.logging.context import make_deferred_yieldable -from synapse.util.stringutils import is_ascii, parse_and_validate_server_name - -logger = logging.getLogger(__name__) - -# list all text content types that will have the charset default to UTF-8 when -# none is given -TEXT_CONTENT_TYPES = [ - "text/css", - "text/csv", - "text/html", - "text/calendar", - "text/plain", - "text/javascript", - "application/json", - "application/ld+json", - "application/rtf", - "image/svg+xml", - "text/xml", -] - - -def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: - """Parses the server name, media ID and optional file name from the request URI - - Also performs some rough validation on the server name. - - Args: - request: The `Request`. - - Returns: - A tuple containing the parsed server name, media ID and optional file name. - - Raises: - SynapseError(404): if parsing or validation fail for any reason - """ - try: - # The type on postpath seems incorrect in Twisted 21.2.0. - postpath: List[bytes] = request.postpath # type: ignore - assert postpath - - # This allows users to append e.g. /test.png to the URL. Useful for - # clients that parse the URL to see content type. - server_name_bytes, media_id_bytes = postpath[:2] - server_name = server_name_bytes.decode("utf-8") - media_id = media_id_bytes.decode("utf8") - - # Validate the server name, raising if invalid - parse_and_validate_server_name(server_name) - - file_name = None - if len(postpath) > 2: - try: - file_name = urllib.parse.unquote(postpath[-1].decode("utf-8")) - except UnicodeDecodeError: - pass - return server_name, media_id, file_name - except Exception: - raise SynapseError( - 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN - ) - - -def respond_404(request: SynapseRequest) -> None: - respond_with_json( - request, - 404, - cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), - send_cors=True, - ) - - -async def respond_with_file( - request: SynapseRequest, - media_type: str, - file_path: str, - file_size: Optional[int] = None, - upload_name: Optional[str] = None, -) -> None: - logger.debug("Responding with %r", file_path) - - if os.path.isfile(file_path): - if file_size is None: - stat = os.stat(file_path) - file_size = stat.st_size - - add_file_headers(request, media_type, file_size, upload_name) - - with open(file_path, "rb") as f: - await make_deferred_yieldable(FileSender().beginFileTransfer(f, request)) - - finish_request(request) - else: - respond_404(request) - - -def add_file_headers( - request: Request, - media_type: str, - file_size: Optional[int], - upload_name: Optional[str], -) -> None: - """Adds the correct response headers in preparation for responding with the - media. - - Args: - request - media_type: The media/content type. - file_size: Size in bytes of the media, if known. - upload_name: The name of the requested file, if any. - """ - - def _quote(x: str) -> str: - return urllib.parse.quote(x.encode("utf-8")) - - # Default to a UTF-8 charset for text content types. - # ex, uses UTF-8 for 'text/css' but not 'text/css; charset=UTF-16' - if media_type.lower() in TEXT_CONTENT_TYPES: - content_type = media_type + "; charset=UTF-8" - else: - content_type = media_type - - request.setHeader(b"Content-Type", content_type.encode("UTF-8")) - if upload_name: - # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. - # - # `filename` is defined to be a `value`, which is defined by RFC2616 - # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token` - # is (essentially) a single US-ASCII word, and a `quoted-string` is a - # US-ASCII string surrounded by double-quotes, using backslash as an - # escape character. Note that %-encoding is *not* permitted. - # - # `filename*` is defined to be an `ext-value`, which is defined in - # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`, - # where `value-chars` is essentially a %-encoded string in the given charset. - # - # [1]: https://tools.ietf.org/html/rfc6266#section-4.1 - # [2]: https://tools.ietf.org/html/rfc2616#section-3.6 - # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1 - - # We avoid the quoted-string version of `filename`, because (a) synapse didn't - # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we - # may as well just do the filename* version. - if _can_encode_filename_as_token(upload_name): - disposition = "inline; filename=%s" % (upload_name,) - else: - disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) - - request.setHeader(b"Content-Disposition", disposition.encode("ascii")) - - # cache for at least a day. - # XXX: we might want to turn this off for data we don't want to - # recommend caching as it's sensitive or private - or at least - # select private. don't bother setting Expires as all our - # clients are smart enough to be happy with Cache-Control - request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") - if file_size is not None: - request.setHeader(b"Content-Length", b"%d" % (file_size,)) - - # Tell web crawlers to not index, archive, or follow links in media. This - # should help to prevent things in the media repo from showing up in web - # search results. - request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex") - - -# separators as defined in RFC2616. SP and HT are handled separately. -# see _can_encode_filename_as_token. -_FILENAME_SEPARATOR_CHARS = { - "(", - ")", - "<", - ">", - "@", - ",", - ";", - ":", - "\\", - '"', - "/", - "[", - "]", - "?", - "=", - "{", - "}", -} - - -def _can_encode_filename_as_token(x: str) -> bool: - for c in x: - # from RFC2616: - # - # token = 1*<any CHAR except CTLs or separators> - # - # separators = "(" | ")" | "<" | ">" | "@" - # | "," | ";" | ":" | "\" | <"> - # | "/" | "[" | "]" | "?" | "=" - # | "{" | "}" | SP | HT - # - # CHAR = <any US-ASCII character (octets 0 - 127)> - # - # CTL = <any US-ASCII control character - # (octets 0 - 31) and DEL (127)> - # - if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS: - return False - return True - - -async def respond_with_responder( - request: SynapseRequest, - responder: "Optional[Responder]", - media_type: str, - file_size: Optional[int], - upload_name: Optional[str] = None, -) -> None: - """Responds to the request with given responder. If responder is None then - returns 404. - - Args: - request - responder - media_type: The media/content type. - file_size: Size in bytes of the media. If not known it should be None - upload_name: The name of the requested file, if any. - """ - if not responder: - respond_404(request) - return - - # If we have a responder we *must* use it as a context manager. - with responder: - if request._disconnected: - logger.warning( - "Not sending response to request %s, already disconnected.", request - ) - return - - logger.debug("Responding to media request with responder %s", responder) - add_file_headers(request, media_type, file_size, upload_name) - try: - await responder.write_to_consumer(request) - except Exception as e: - # The majority of the time this will be due to the client having gone - # away. Unfortunately, Twisted simply throws a generic exception at us - # in that case. - logger.warning("Failed to write to consumer: %s %s", type(e), e) - - # Unregister the producer, if it has one, so Twisted doesn't complain - if request.producer: - request.unregisterProducer() - - finish_request(request) - - -class Responder(ABC): - """Represents a response that can be streamed to the requester. - - Responder is a context manager which *must* be used, so that any resources - held can be cleaned up. - """ - - @abstractmethod - def write_to_consumer(self, consumer: IConsumer) -> Awaitable: - """Stream response into consumer - - Args: - consumer: The consumer to stream into. - - Returns: - Resolves once the response has finished being written - """ - raise NotImplementedError() - - def __enter__(self) -> None: # noqa: B027 - pass - - def __exit__( # noqa: B027 - self, - exc_type: Optional[Type[BaseException]], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], - ) -> None: - pass - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class ThumbnailInfo: - """Details about a generated thumbnail.""" - - width: int - height: int - method: str - # Content type of thumbnail, e.g. image/png - type: str - # The size of the media file, in bytes. - length: Optional[int] = None - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class FileInfo: - """Details about a requested/uploaded file.""" - - # The server name where the media originated from, or None if local. - server_name: Optional[str] - # The local ID of the file. For local files this is the same as the media_id - file_id: str - # If the file is for the url preview cache - url_cache: bool = False - # Whether the file is a thumbnail or not. - thumbnail: Optional[ThumbnailInfo] = None - - # The below properties exist to maintain compatibility with third-party modules. - @property - def thumbnail_width(self) -> Optional[int]: - if not self.thumbnail: - return None - return self.thumbnail.width - - @property - def thumbnail_height(self) -> Optional[int]: - if not self.thumbnail: - return None - return self.thumbnail.height - - @property - def thumbnail_method(self) -> Optional[str]: - if not self.thumbnail: - return None - return self.thumbnail.method - - @property - def thumbnail_type(self) -> Optional[str]: - if not self.thumbnail: - return None - return self.thumbnail.type - - @property - def thumbnail_length(self) -> Optional[int]: - if not self.thumbnail: - return None - return self.thumbnail.length - - -def get_filename_from_headers(headers: Dict[bytes, List[bytes]]) -> Optional[str]: - """ - Get the filename of the downloaded file by inspecting the - Content-Disposition HTTP header. - - Args: - headers: The HTTP request headers. - - Returns: - The filename, or None. - """ - content_disposition = headers.get(b"Content-Disposition", [b""]) - - # No header, bail out. - if not content_disposition[0]: - return None - - _, params = _parse_header(content_disposition[0]) - - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get(b"filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith(b"utf-8''"): - upload_name_utf8 = upload_name_utf8[7:] - # We have a filename*= section. This MUST be ASCII, and any UTF-8 - # bytes are %-quoted. - try: - # Once it is decoded, we can then unquote the %-encoded - # parts strictly into a unicode string. - upload_name = urllib.parse.unquote( - upload_name_utf8.decode("ascii"), errors="strict" - ) - except UnicodeDecodeError: - # Incorrect UTF-8. - pass - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get(b"filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii.decode("ascii") - - # This may be None here, indicating we did not find a matching name. - return upload_name - - -def _parse_header(line: bytes) -> Tuple[bytes, Dict[bytes, bytes]]: - """Parse a Content-type like header. - - Cargo-culted from `cgi`, but works on bytes rather than strings. - - Args: - line: header to be parsed - - Returns: - The main content-type, followed by the parameter dictionary - """ - parts = _parseparam(b";" + line) - key = next(parts) - pdict = {} - for p in parts: - i = p.find(b"=") - if i >= 0: - name = p[:i].strip().lower() - value = p[i + 1 :].strip() - - # strip double-quotes - if len(value) >= 2 and value[0:1] == value[-1:] == b'"': - value = value[1:-1] - value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"') - pdict[name] = value - - return key, pdict - - -def _parseparam(s: bytes) -> Generator[bytes, None, None]: - """Generator which splits the input on ;, respecting double-quoted sequences - - Cargo-culted from `cgi`, but works on bytes rather than strings. - - Args: - s: header to be parsed - - Returns: - The split input - """ - while s[:1] == b";": - s = s[1:] - - # look for the next ; - end = s.find(b";") - - # if there is an odd number of " marks between here and the next ;, skip to the - # next ; instead - while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2: - end = s.find(b";", end + 1) - - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] +# This exists purely for backwards compatibility with media providers and spam checkers. +from synapse.media._base import FileInfo, Responder # noqa: F401 diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index db25848744..11b0e8e231 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -1,4 +1,4 @@ -# Copyright 2018-2021 The Matrix.org Foundation C.I.C. +# Copyright 2023 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,364 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import contextlib -import logging -import os -import shutil -from types import TracebackType -from typing import ( - IO, - TYPE_CHECKING, - Any, - Awaitable, - BinaryIO, - Callable, - Generator, - Optional, - Sequence, - Tuple, - Type, -) - -import attr - -from twisted.internet.defer import Deferred -from twisted.internet.interfaces import IConsumer -from twisted.protocols.basic import FileSender - -import synapse -from synapse.api.errors import NotFoundError -from synapse.logging.context import defer_to_thread, make_deferred_yieldable -from synapse.util import Clock -from synapse.util.file_consumer import BackgroundFileConsumer - -from ._base import FileInfo, Responder -from .filepath import MediaFilePaths - -if TYPE_CHECKING: - from synapse.rest.media.v1.storage_provider import StorageProvider - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - - -class MediaStorage: - """Responsible for storing/fetching files from local sources. - - Args: - hs - local_media_directory: Base path where we store media on disk - filepaths - storage_providers: List of StorageProvider that are used to fetch and store files. - """ - - def __init__( - self, - hs: "HomeServer", - local_media_directory: str, - filepaths: MediaFilePaths, - storage_providers: Sequence["StorageProvider"], - ): - self.hs = hs - self.reactor = hs.get_reactor() - self.local_media_directory = local_media_directory - self.filepaths = filepaths - self.storage_providers = storage_providers - self.spam_checker = hs.get_spam_checker() - self.clock = hs.get_clock() - - async def store_file(self, source: IO, file_info: FileInfo) -> str: - """Write `source` to the on disk media store, and also any other - configured storage providers - - Args: - source: A file like object that should be written - file_info: Info about the file to store - - Returns: - the file path written to in the primary media store - """ - - with self.store_into_file(file_info) as (f, fname, finish_cb): - # Write to the main repository - await self.write_to_file(source, f) - await finish_cb() - - return fname - - async def write_to_file(self, source: IO, output: IO) -> None: - """Asynchronously write the `source` to `output`.""" - await defer_to_thread(self.reactor, _write_file_synchronously, source, output) - - @contextlib.contextmanager - def store_into_file( - self, file_info: FileInfo - ) -> Generator[Tuple[BinaryIO, str, Callable[[], Awaitable[None]]], None, None]: - """Context manager used to get a file like object to write into, as - described by file_info. - - Actually yields a 3-tuple (file, fname, finish_cb), where file is a file - like object that can be written to, fname is the absolute path of file - on disk, and finish_cb is a function that returns an awaitable. - - fname can be used to read the contents from after upload, e.g. to - generate thumbnails. - - finish_cb must be called and waited on after the file has been - successfully been written to. Should not be called if there was an - error. - - Args: - file_info: Info about the file to store - - Example: - - with media_storage.store_into_file(info) as (f, fname, finish_cb): - # .. write into f ... - await finish_cb() - """ - - path = self._file_info_to_path(file_info) - fname = os.path.join(self.local_media_directory, path) - - dirname = os.path.dirname(fname) - os.makedirs(dirname, exist_ok=True) - - finished_called = [False] - - try: - with open(fname, "wb") as f: - - async def finish() -> None: - # Ensure that all writes have been flushed and close the - # file. - f.flush() - f.close() - - spam_check = await self.spam_checker.check_media_file_for_spam( - ReadableFileWrapper(self.clock, fname), file_info - ) - if spam_check != synapse.module_api.NOT_SPAM: - logger.info("Blocking media due to spam checker") - # Note that we'll delete the stored media, due to the - # try/except below. The media also won't be stored in - # the DB. - # We currently ignore any additional field returned by - # the spam-check API. - raise SpamMediaException(errcode=spam_check[0]) - - for provider in self.storage_providers: - await provider.store_file(path, file_info) - - finished_called[0] = True - - yield f, fname, finish - except Exception as e: - try: - os.remove(fname) - except Exception: - pass - - raise e from None - - if not finished_called: - raise Exception("Finished callback not called") - - async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: - """Attempts to fetch media described by file_info from the local cache - and configured storage providers. - - Args: - file_info - - Returns: - Returns a Responder if the file was found, otherwise None. - """ - paths = [self._file_info_to_path(file_info)] - - # fallback for remote thumbnails with no method in the filename - if file_info.thumbnail and file_info.server_name: - paths.append( - self.filepaths.remote_media_thumbnail_rel_legacy( - server_name=file_info.server_name, - file_id=file_info.file_id, - width=file_info.thumbnail.width, - height=file_info.thumbnail.height, - content_type=file_info.thumbnail.type, - ) - ) - - for path in paths: - local_path = os.path.join(self.local_media_directory, path) - if os.path.exists(local_path): - logger.debug("responding with local file %s", local_path) - return FileResponder(open(local_path, "rb")) - logger.debug("local file %s did not exist", local_path) - - for provider in self.storage_providers: - for path in paths: - res: Any = await provider.fetch(path, file_info) - if res: - logger.debug("Streaming %s from %s", path, provider) - return res - logger.debug("%s not found on %s", path, provider) - - return None - - async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: - """Ensures that the given file is in the local cache. Attempts to - download it from storage providers if it isn't. - - Args: - file_info - - Returns: - Full path to local file - """ - path = self._file_info_to_path(file_info) - local_path = os.path.join(self.local_media_directory, path) - if os.path.exists(local_path): - return local_path - - # Fallback for paths without method names - # Should be removed in the future - if file_info.thumbnail and file_info.server_name: - legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy( - server_name=file_info.server_name, - file_id=file_info.file_id, - width=file_info.thumbnail.width, - height=file_info.thumbnail.height, - content_type=file_info.thumbnail.type, - ) - legacy_local_path = os.path.join(self.local_media_directory, legacy_path) - if os.path.exists(legacy_local_path): - return legacy_local_path - - dirname = os.path.dirname(local_path) - os.makedirs(dirname, exist_ok=True) - - for provider in self.storage_providers: - res: Any = await provider.fetch(path, file_info) - if res: - with res: - consumer = BackgroundFileConsumer( - open(local_path, "wb"), self.reactor - ) - await res.write_to_consumer(consumer) - await consumer.wait() - return local_path - - raise NotFoundError() - - def _file_info_to_path(self, file_info: FileInfo) -> str: - """Converts file_info into a relative path. - - The path is suitable for storing files under a directory, e.g. used to - store files on local FS under the base media repository directory. - """ - if file_info.url_cache: - if file_info.thumbnail: - return self.filepaths.url_cache_thumbnail_rel( - media_id=file_info.file_id, - width=file_info.thumbnail.width, - height=file_info.thumbnail.height, - content_type=file_info.thumbnail.type, - method=file_info.thumbnail.method, - ) - return self.filepaths.url_cache_filepath_rel(file_info.file_id) - - if file_info.server_name: - if file_info.thumbnail: - return self.filepaths.remote_media_thumbnail_rel( - server_name=file_info.server_name, - file_id=file_info.file_id, - width=file_info.thumbnail.width, - height=file_info.thumbnail.height, - content_type=file_info.thumbnail.type, - method=file_info.thumbnail.method, - ) - return self.filepaths.remote_media_filepath_rel( - file_info.server_name, file_info.file_id - ) - - if file_info.thumbnail: - return self.filepaths.local_media_thumbnail_rel( - media_id=file_info.file_id, - width=file_info.thumbnail.width, - height=file_info.thumbnail.height, - content_type=file_info.thumbnail.type, - method=file_info.thumbnail.method, - ) - return self.filepaths.local_media_filepath_rel(file_info.file_id) - - -def _write_file_synchronously(source: IO, dest: IO) -> None: - """Write `source` to the file like `dest` synchronously. Should be called - from a thread. - - Args: - source: A file like object that's to be written - dest: A file like object to be written to - """ - source.seek(0) # Ensure we read from the start of the file - shutil.copyfileobj(source, dest) - - -class FileResponder(Responder): - """Wraps an open file that can be sent to a request. - - Args: - open_file: A file like object to be streamed ot the client, - is closed when finished streaming. - """ - - def __init__(self, open_file: IO): - self.open_file = open_file - - def write_to_consumer(self, consumer: IConsumer) -> Deferred: - return make_deferred_yieldable( - FileSender().beginFileTransfer(self.open_file, consumer) - ) - - def __exit__( - self, - exc_type: Optional[Type[BaseException]], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], - ) -> None: - self.open_file.close() - - -class SpamMediaException(NotFoundError): - """The media was blocked by a spam checker, so we simply 404 the request (in - the same way as if it was quarantined). - """ - - -@attr.s(slots=True, auto_attribs=True) -class ReadableFileWrapper: - """Wrapper that allows reading a file in chunks, yielding to the reactor, - and writing to a callback. - - This is simplified `FileSender` that takes an IO object rather than an - `IConsumer`. - """ - - CHUNK_SIZE = 2**14 - - clock: Clock - path: str - - async def write_chunks_to(self, callback: Callable[[bytes], object]) -> None: - """Reads the file in chunks and calls the callback with each chunk.""" - - with open(self.path, "rb") as file: - while True: - chunk = file.read(self.CHUNK_SIZE) - if not chunk: - break - - callback(chunk) +# - # We yield to the reactor by sleeping for 0 seconds. - await self.clock.sleep(0) +# This exists purely for backwards compatibility with spam checkers. +from synapse.media.media_storage import ReadableFileWrapper # noqa: F401 diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 1c9b71d69c..d7653f30ae 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -1,4 +1,4 @@ -# Copyright 2018-2021 The Matrix.org Foundation C.I.C. +# Copyright 2023 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,171 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -import abc -import logging -import os -import shutil -from typing import TYPE_CHECKING, Callable, Optional - -from synapse.config._base import Config -from synapse.logging.context import defer_to_thread, run_in_background -from synapse.util.async_helpers import maybe_awaitable - -from ._base import FileInfo, Responder -from .media_storage import FileResponder - -logger = logging.getLogger(__name__) - -if TYPE_CHECKING: - from synapse.server import HomeServer - - -class StorageProvider(metaclass=abc.ABCMeta): - """A storage provider is a service that can store uploaded media and - retrieve them. - """ - - @abc.abstractmethod - async def store_file(self, path: str, file_info: FileInfo) -> None: - """Store the file described by file_info. The actual contents can be - retrieved by reading the file in file_info.upload_path. - - Args: - path: Relative path of file in local cache - file_info: The metadata of the file. - """ - - @abc.abstractmethod - async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: - """Attempt to fetch the file described by file_info and stream it - into writer. - - Args: - path: Relative path of file in local cache - file_info: The metadata of the file. - - Returns: - Returns a Responder if the provider has the file, otherwise returns None. - """ - - -class StorageProviderWrapper(StorageProvider): - """Wraps a storage provider and provides various config options - - Args: - backend: The storage provider to wrap. - store_local: Whether to store new local files or not. - store_synchronous: Whether to wait for file to be successfully - uploaded, or todo the upload in the background. - store_remote: Whether remote media should be uploaded - """ - - def __init__( - self, - backend: StorageProvider, - store_local: bool, - store_synchronous: bool, - store_remote: bool, - ): - self.backend = backend - self.store_local = store_local - self.store_synchronous = store_synchronous - self.store_remote = store_remote - - def __str__(self) -> str: - return "StorageProviderWrapper[%s]" % (self.backend,) - - async def store_file(self, path: str, file_info: FileInfo) -> None: - if not file_info.server_name and not self.store_local: - return None - - if file_info.server_name and not self.store_remote: - return None - - if file_info.url_cache: - # The URL preview cache is short lived and not worth offloading or - # backing up. - return None - - if self.store_synchronous: - # store_file is supposed to return an Awaitable, but guard - # against improper implementations. - await maybe_awaitable(self.backend.store_file(path, file_info)) # type: ignore - else: - # TODO: Handle errors. - async def store() -> None: - try: - return await maybe_awaitable( - self.backend.store_file(path, file_info) - ) - except Exception: - logger.exception("Error storing file") - - run_in_background(store) - - async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: - if file_info.url_cache: - # Files in the URL preview cache definitely aren't stored here, - # so avoid any potentially slow I/O or network access. - return None - - # store_file is supposed to return an Awaitable, but guard - # against improper implementations. - return await maybe_awaitable(self.backend.fetch(path, file_info)) - - -class FileStorageProviderBackend(StorageProvider): - """A storage provider that stores files in a directory on a filesystem. - - Args: - hs - config: The config returned by `parse_config`. - """ - - def __init__(self, hs: "HomeServer", config: str): - self.hs = hs - self.cache_directory = hs.config.media.media_store_path - self.base_directory = config - - def __str__(self) -> str: - return "FileStorageProviderBackend[%s]" % (self.base_directory,) - - async def store_file(self, path: str, file_info: FileInfo) -> None: - """See StorageProvider.store_file""" - - primary_fname = os.path.join(self.cache_directory, path) - backup_fname = os.path.join(self.base_directory, path) - - dirname = os.path.dirname(backup_fname) - os.makedirs(dirname, exist_ok=True) - - # mypy needs help inferring the type of the second parameter, which is generic - shutil_copyfile: Callable[[str, str], str] = shutil.copyfile - await defer_to_thread( - self.hs.get_reactor(), - shutil_copyfile, - primary_fname, - backup_fname, - ) - - async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: - """See StorageProvider.fetch""" - - backup_fname = os.path.join(self.base_directory, path) - if os.path.isfile(backup_fname): - return FileResponder(open(backup_fname, "rb")) - - return None - - @staticmethod - def parse_config(config: dict) -> str: - """Called on startup to parse config supplied. This should parse - the config and raise if there is a problem. - - The returned value is passed into the constructor. - - In this case we only care about a single param, the directory, so let's - just pull that out. - """ - return Config.ensure_directory(config["directory"]) +# This exists purely for backwards compatibility with media providers. +from synapse.media.storage_provider import StorageProvider # noqa: F401 diff --git a/synapse/server.py b/synapse/server.py index e5a3475247..a7c32e9a60 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -105,6 +105,7 @@ from synapse.handlers.typing import FollowerTypingHandler, TypingWriterHandler from synapse.handlers.user_directory import UserDirectoryHandler from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpClient from synapse.http.matrixfederationclient import MatrixFederationHttpClient +from synapse.media.media_repository import MediaRepository from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager from synapse.module_api import ModuleApi from synapse.notifier import Notifier, ReplicationNotifier @@ -115,10 +116,7 @@ from synapse.replication.tcp.external_cache import ExternalCache from synapse.replication.tcp.handler import ReplicationCommandHandler from synapse.replication.tcp.resource import ReplicationStreamer from synapse.replication.tcp.streams import STREAMS_MAP, Stream -from synapse.rest.media.v1.media_repository import ( - MediaRepository, - MediaRepositoryResource, -) +from synapse.rest.media.media_repository_resource import MediaRepositoryResource from synapse.server_notices.server_notices_manager import ServerNoticesManager from synapse.server_notices.server_notices_sender import ServerNoticesSender from synapse.server_notices.worker_server_notices_sender import ( diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 837dc7646e..dc3948c170 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -43,7 +43,7 @@ from .event_federation import EventFederationStore from .event_push_actions import EventPushActionsStore from .events_bg_updates import EventsBackgroundUpdatesStore from .events_forward_extremities import EventForwardExtremitiesStore -from .filtering import FilteringStore +from .filtering import FilteringWorkerStore from .keys import KeyStore from .lock import LockStore from .media_repository import MediaRepositoryStore @@ -99,7 +99,7 @@ class DataStore( EventFederationStore, MediaRepositoryStore, RejectionsStore, - FilteringStore, + FilteringWorkerStore, PusherStore, PushRuleStore, ApplicationServiceTransactionStore, diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 95567826f2..a9843f6e17 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -40,7 +40,6 @@ from synapse.storage.databases.main.push_rule import PushRulesWorkerStore from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import ( AbstractStreamIdGenerator, - AbstractStreamIdTracker, MultiWriterIdGenerator, StreamIdGenerator, ) @@ -64,14 +63,12 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) ): super().__init__(database, db_conn, hs) - # `_can_write_to_account_data` indicates whether the current worker is allowed - # to write account data. A value of `True` implies that `_account_data_id_gen` - # is an `AbstractStreamIdGenerator` and not just a tracker. - self._account_data_id_gen: AbstractStreamIdTracker self._can_write_to_account_data = ( self._instance_name in hs.config.worker.writers.account_data ) + self._account_data_id_gen: AbstractStreamIdGenerator + if isinstance(database.engine, PostgresEngine): self._account_data_id_gen = MultiWriterIdGenerator( db_conn=db_conn, @@ -237,6 +234,37 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) else: return None + async def get_latest_stream_id_for_global_account_data_by_type_for_user( + self, user_id: str, data_type: str + ) -> Optional[int]: + """ + Returns: + The stream ID of the account data, + or None if there is no such account data. + """ + + def get_latest_stream_id_for_global_account_data_by_type_for_user_txn( + txn: LoggingTransaction, + ) -> Optional[int]: + sql = """ + SELECT stream_id FROM account_data + WHERE user_id = ? AND account_data_type = ? + ORDER BY stream_id DESC + LIMIT 1 + """ + txn.execute(sql, (user_id, data_type)) + + row = txn.fetchone() + if row: + return row[0] + else: + return None + + return await self.db_pool.runInteraction( + "get_latest_stream_id_for_global_account_data_by_type_for_user", + get_latest_stream_id_for_global_account_data_by_type_for_user_txn, + ) + @cached(num_args=2, tree=True) async def get_account_data_for_room( self, user_id: str, room_id: str @@ -527,7 +555,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) The maximum stream ID. """ assert self._can_write_to_account_data - assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator) content_json = json_encoder.encode(content) @@ -554,7 +581,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) async def remove_account_data_for_room( self, user_id: str, room_id: str, account_data_type: str - ) -> Optional[int]: + ) -> int: """Delete the room account data for the user of a given type. Args: @@ -567,7 +594,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) data to delete. """ assert self._can_write_to_account_data - assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator) def _remove_account_data_for_room_txn( txn: LoggingTransaction, next_id: int @@ -606,15 +632,13 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) next_id, ) - if not row_updated: - return None - - self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_room_account_data_for_user.invalidate((user_id,)) - self.get_account_data_for_room.invalidate((user_id, room_id)) - self.get_account_data_for_room_and_type.prefill( - (user_id, room_id, account_data_type), {} - ) + if row_updated: + self._account_data_stream_cache.entity_has_changed(user_id, next_id) + self.get_room_account_data_for_user.invalidate((user_id,)) + self.get_account_data_for_room.invalidate((user_id, room_id)) + self.get_account_data_for_room_and_type.prefill( + (user_id, room_id, account_data_type), {} + ) return self._account_data_id_gen.get_current_token() @@ -632,7 +656,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) The maximum stream ID. """ assert self._can_write_to_account_data - assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator) async with self._account_data_id_gen.get_next() as next_id: await self.db_pool.runInteraction( @@ -722,7 +745,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) self, user_id: str, account_data_type: str, - ) -> Optional[int]: + ) -> int: """ Delete a single piece of user account data by type. @@ -739,7 +762,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) to delete. """ assert self._can_write_to_account_data - assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator) def _remove_account_data_for_user_txn( txn: LoggingTransaction, next_id: int @@ -809,14 +831,12 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) next_id, ) - if not row_updated: - return None - - self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_global_account_data_for_user.invalidate((user_id,)) - self.get_global_account_data_by_type_for_user.prefill( - (user_id, account_data_type), {} - ) + if row_updated: + self._account_data_stream_cache.entity_has_changed(user_id, next_id) + self.get_global_account_data_for_user.invalidate((user_id,)) + self.get_global_account_data_by_type_for_user.prefill( + (user_id, account_data_type), {} + ) return self._account_data_id_gen.get_current_token() diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 5b66431691..096dec7f87 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -266,9 +266,6 @@ class CacheInvalidationWorkerStore(SQLBaseStore): if relates_to: self._attempt_to_invalidate_cache("get_relations_for_event", (relates_to,)) self._attempt_to_invalidate_cache("get_references_for_event", (relates_to,)) - self._attempt_to_invalidate_cache( - "get_aggregation_groups_for_event", (relates_to,) - ) self._attempt_to_invalidate_cache("get_applicable_edit", (relates_to,)) self._attempt_to_invalidate_cache("get_thread_summary", (relates_to,)) self._attempt_to_invalidate_cache("get_thread_participated", (relates_to,)) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 73b8aea16c..a8a4ed4436 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2024,10 +2024,6 @@ class PersistEventsStore: self.store._invalidate_cache_and_stream( txn, self.store.get_relations_for_event, (redacted_relates_to,) ) - if rel_type == RelationTypes.ANNOTATION: - self.store._invalidate_cache_and_stream( - txn, self.store.get_aggregation_groups_for_event, (redacted_relates_to,) - ) if rel_type == RelationTypes.REFERENCE: self.store._invalidate_cache_and_stream( txn, self.store.get_references_for_event, (redacted_relates_to,) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 0a275e6ce6..daef3685b0 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1220,9 +1220,6 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): txn, self.get_relations_for_event, cache_tuple # type: ignore[attr-defined] ) self._invalidate_cache_and_stream( # type: ignore[attr-defined] - txn, self.get_aggregation_groups_for_event, cache_tuple # type: ignore[attr-defined] - ) - self._invalidate_cache_and_stream( # type: ignore[attr-defined] txn, self.get_thread_summary, cache_tuple # type: ignore[attr-defined] ) diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py index 12f3b601f1..8e57c8e5a0 100644 --- a/synapse/storage/databases/main/filtering.py +++ b/synapse/storage/databases/main/filtering.py @@ -17,7 +17,7 @@ from typing import Optional, Tuple, Union, cast from canonicaljson import encode_canonical_json -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, StoreError, SynapseError from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage.database import LoggingTransaction from synapse.types import JsonDict @@ -46,8 +46,6 @@ class FilteringWorkerStore(SQLBaseStore): return db_to_json(def_json) - -class FilteringStore(FilteringWorkerStore): async def add_user_filter(self, user_localpart: str, user_filter: JsonDict) -> int: def_json = encode_canonical_json(user_filter) @@ -79,4 +77,23 @@ class FilteringStore(FilteringWorkerStore): return filter_id - return await self.db_pool.runInteraction("add_user_filter", _do_txn) + attempts = 0 + while True: + # Try a few times. + # This is technically needed if a user tries to create two filters at once, + # leading to two concurrent transactions. + # The failure case would be: + # - SELECT filter_id ... filter_json = ? → both transactions return no rows + # - SELECT MAX(filter_id) ... → both transactions return e.g. 5 + # - INSERT INTO ... → both transactions insert filter_id = 6 + # One of the transactions will commit. The other will get a unique key + # constraint violation error (IntegrityError). This is not the same as a + # serialisability violation, which would be automatically retried by + # `runInteraction`. + try: + return await self.db_pool.runInteraction("add_user_filter", _do_txn) + except self.db_pool.engine.module.IntegrityError: + attempts += 1 + + if attempts >= 5: + raise StoreError(500, "Couldn't generate a filter ID.") diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 9a55e17624..717237e024 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -1002,19 +1002,6 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): desc="user_delete_threepid", ) - async def user_delete_threepids(self, user_id: str) -> None: - """Delete all threepid this user has bound - - Args: - user_id: The user id to delete all threepids of - - """ - await self.db_pool.simple_delete( - "user_threepids", - keyvalues={"user_id": user_id}, - desc="user_delete_threepids", - ) - async def add_user_bound_threepid( self, user_id: str, medium: str, address: str, id_server: str ) -> None: diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index fa3266c081..bc3a83919c 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -398,143 +398,6 @@ class RelationsWorkerStore(SQLBaseStore): return result is not None @cached() - async def get_aggregation_groups_for_event( - self, event_id: str - ) -> Sequence[JsonDict]: - raise NotImplementedError() - - @cachedList( - cached_method_name="get_aggregation_groups_for_event", list_name="event_ids" - ) - async def get_aggregation_groups_for_events( - self, event_ids: Collection[str] - ) -> Mapping[str, Optional[List[JsonDict]]]: - """Get a list of annotations on the given events, grouped by event type and - aggregation key, sorted by count. - - This is used e.g. to get the what and how many reactions have happend - on an event. - - Args: - event_ids: Fetch events that relate to these event IDs. - - Returns: - A map of event IDs to a list of groups of annotations that match. - Each entry is a dict with `type`, `key` and `count` fields. - """ - # The number of entries to return per event ID. - limit = 5 - - clause, args = make_in_list_sql_clause( - self.database_engine, "relates_to_id", event_ids - ) - args.append(RelationTypes.ANNOTATION) - - sql = f""" - SELECT - relates_to_id, - annotation.type, - aggregation_key, - COUNT(DISTINCT annotation.sender) - FROM events AS annotation - INNER JOIN event_relations USING (event_id) - INNER JOIN events AS parent ON - parent.event_id = relates_to_id - AND parent.room_id = annotation.room_id - WHERE - {clause} - AND relation_type = ? - GROUP BY relates_to_id, annotation.type, aggregation_key - ORDER BY relates_to_id, COUNT(*) DESC - """ - - def _get_aggregation_groups_for_events_txn( - txn: LoggingTransaction, - ) -> Mapping[str, List[JsonDict]]: - txn.execute(sql, args) - - result: Dict[str, List[JsonDict]] = {} - for event_id, type, key, count in cast( - List[Tuple[str, str, str, int]], txn - ): - event_results = result.setdefault(event_id, []) - - # Limit the number of results per event ID. - if len(event_results) == limit: - continue - - event_results.append({"type": type, "key": key, "count": count}) - - return result - - return await self.db_pool.runInteraction( - "get_aggregation_groups_for_events", _get_aggregation_groups_for_events_txn - ) - - async def get_aggregation_groups_for_users( - self, event_ids: Collection[str], users: FrozenSet[str] - ) -> Dict[str, Dict[Tuple[str, str], int]]: - """Fetch the partial aggregations for an event for specific users. - - This is used, in conjunction with get_aggregation_groups_for_event, to - remove information from the results for ignored users. - - Args: - event_ids: Fetch events that relate to these event IDs. - users: The users to fetch information for. - - Returns: - A map of event ID to a map of (event type, aggregation key) to a - count of users. - """ - - if not users: - return {} - - events_sql, args = make_in_list_sql_clause( - self.database_engine, "relates_to_id", event_ids - ) - - users_sql, users_args = make_in_list_sql_clause( - self.database_engine, "annotation.sender", users - ) - args.extend(users_args) - args.append(RelationTypes.ANNOTATION) - - sql = f""" - SELECT - relates_to_id, - annotation.type, - aggregation_key, - COUNT(DISTINCT annotation.sender) - FROM events AS annotation - INNER JOIN event_relations USING (event_id) - INNER JOIN events AS parent ON - parent.event_id = relates_to_id - AND parent.room_id = annotation.room_id - WHERE {events_sql} AND {users_sql} AND relation_type = ? - GROUP BY relates_to_id, annotation.type, aggregation_key - ORDER BY relates_to_id, COUNT(*) DESC - """ - - def _get_aggregation_groups_for_users_txn( - txn: LoggingTransaction, - ) -> Dict[str, Dict[Tuple[str, str], int]]: - txn.execute(sql, args) - - result: Dict[str, Dict[Tuple[str, str], int]] = {} - for event_id, type, key, count in cast( - List[Tuple[str, str, str, int]], txn - ): - result.setdefault(event_id, {})[(type, key)] = count - - return result - - return await self.db_pool.runInteraction( - "get_aggregation_groups_for_users", _get_aggregation_groups_for_users_txn - ) - - @cached() async def get_references_for_event(self, event_id: str) -> List[JsonDict]: raise NotImplementedError() diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 39f89291b2..3825bd6079 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1417,6 +1417,204 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): get_un_partial_stated_rooms_from_stream_txn, ) + async def get_event_report(self, report_id: int) -> Optional[Dict[str, Any]]: + """Retrieve an event report + + Args: + report_id: ID of reported event in database + Returns: + JSON dict of information from an event report or None if the + report does not exist. + """ + + def _get_event_report_txn( + txn: LoggingTransaction, report_id: int + ) -> Optional[Dict[str, Any]]: + sql = """ + SELECT + er.id, + er.received_ts, + er.room_id, + er.event_id, + er.user_id, + er.content, + events.sender, + room_stats_state.canonical_alias, + room_stats_state.name, + event_json.json AS event_json + FROM event_reports AS er + LEFT JOIN events + ON events.event_id = er.event_id + JOIN event_json + ON event_json.event_id = er.event_id + JOIN room_stats_state + ON room_stats_state.room_id = er.room_id + WHERE er.id = ? + """ + + txn.execute(sql, [report_id]) + row = txn.fetchone() + + if not row: + return None + + event_report = { + "id": row[0], + "received_ts": row[1], + "room_id": row[2], + "event_id": row[3], + "user_id": row[4], + "score": db_to_json(row[5]).get("score"), + "reason": db_to_json(row[5]).get("reason"), + "sender": row[6], + "canonical_alias": row[7], + "name": row[8], + "event_json": db_to_json(row[9]), + } + + return event_report + + return await self.db_pool.runInteraction( + "get_event_report", _get_event_report_txn, report_id + ) + + async def get_event_reports_paginate( + self, + start: int, + limit: int, + direction: Direction = Direction.BACKWARDS, + user_id: Optional[str] = None, + room_id: Optional[str] = None, + ) -> Tuple[List[Dict[str, Any]], int]: + """Retrieve a paginated list of event reports + + Args: + start: event offset to begin the query from + limit: number of rows to retrieve + direction: Whether to fetch the most recent first (backwards) or the + oldest first (forwards) + user_id: search for user_id. Ignored if user_id is None + room_id: search for room_id. Ignored if room_id is None + Returns: + Tuple of: + json list of event reports + total number of event reports matching the filter criteria + """ + + def _get_event_reports_paginate_txn( + txn: LoggingTransaction, + ) -> Tuple[List[Dict[str, Any]], int]: + filters = [] + args: List[object] = [] + + if user_id: + filters.append("er.user_id LIKE ?") + args.extend(["%" + user_id + "%"]) + if room_id: + filters.append("er.room_id LIKE ?") + args.extend(["%" + room_id + "%"]) + + if direction == Direction.BACKWARDS: + order = "DESC" + else: + order = "ASC" + + where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" + + # We join on room_stats_state despite not using any columns from it + # because the join can influence the number of rows returned; + # e.g. a room that doesn't have state, maybe because it was deleted. + # The query returning the total count should be consistent with + # the query returning the results. + sql = """ + SELECT COUNT(*) as total_event_reports + FROM event_reports AS er + JOIN room_stats_state ON room_stats_state.room_id = er.room_id + {} + """.format( + where_clause + ) + txn.execute(sql, args) + count = cast(Tuple[int], txn.fetchone())[0] + + sql = """ + SELECT + er.id, + er.received_ts, + er.room_id, + er.event_id, + er.user_id, + er.content, + events.sender, + room_stats_state.canonical_alias, + room_stats_state.name + FROM event_reports AS er + LEFT JOIN events + ON events.event_id = er.event_id + JOIN room_stats_state + ON room_stats_state.room_id = er.room_id + {where_clause} + ORDER BY er.received_ts {order} + LIMIT ? + OFFSET ? + """.format( + where_clause=where_clause, + order=order, + ) + + args += [limit, start] + txn.execute(sql, args) + + event_reports = [] + for row in txn: + try: + s = db_to_json(row[5]).get("score") + r = db_to_json(row[5]).get("reason") + except Exception: + logger.error("Unable to parse json from event_reports: %s", row[0]) + continue + event_reports.append( + { + "id": row[0], + "received_ts": row[1], + "room_id": row[2], + "event_id": row[3], + "user_id": row[4], + "score": s, + "reason": r, + "sender": row[6], + "canonical_alias": row[7], + "name": row[8], + } + ) + + return event_reports, count + + return await self.db_pool.runInteraction( + "get_event_reports_paginate", _get_event_reports_paginate_txn + ) + + async def delete_event_report(self, report_id: int) -> bool: + """Remove an event report from database. + + Args: + report_id: Report to delete + + Returns: + Whether the report was successfully deleted or not. + """ + try: + await self.db_pool.simple_delete_one( + table="event_reports", + keyvalues={"id": report_id}, + desc="delete_event_report", + ) + except StoreError: + # Deletion failed because report does not exist + return False + + return True + class _BackgroundUpdates: REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory" @@ -2139,7 +2337,19 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): reason: Optional[str], content: JsonDict, received_ts: int, - ) -> None: + ) -> int: + """Add an event report + + Args: + room_id: Room that contains the reported event. + event_id: The reported event. + user_id: User who reports the event. + reason: Description that the user specifies. + content: Report request body (score and reason). + received_ts: Time when the user submitted the report (milliseconds). + Returns: + Id of the event report. + """ next_id = self._event_reports_id_gen.get_next() await self.db_pool.simple_insert( table="event_reports", @@ -2154,183 +2364,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): }, desc="add_event_report", ) - - async def get_event_report(self, report_id: int) -> Optional[Dict[str, Any]]: - """Retrieve an event report - - Args: - report_id: ID of reported event in database - Returns: - JSON dict of information from an event report or None if the - report does not exist. - """ - - def _get_event_report_txn( - txn: LoggingTransaction, report_id: int - ) -> Optional[Dict[str, Any]]: - sql = """ - SELECT - er.id, - er.received_ts, - er.room_id, - er.event_id, - er.user_id, - er.content, - events.sender, - room_stats_state.canonical_alias, - room_stats_state.name, - event_json.json AS event_json - FROM event_reports AS er - LEFT JOIN events - ON events.event_id = er.event_id - JOIN event_json - ON event_json.event_id = er.event_id - JOIN room_stats_state - ON room_stats_state.room_id = er.room_id - WHERE er.id = ? - """ - - txn.execute(sql, [report_id]) - row = txn.fetchone() - - if not row: - return None - - event_report = { - "id": row[0], - "received_ts": row[1], - "room_id": row[2], - "event_id": row[3], - "user_id": row[4], - "score": db_to_json(row[5]).get("score"), - "reason": db_to_json(row[5]).get("reason"), - "sender": row[6], - "canonical_alias": row[7], - "name": row[8], - "event_json": db_to_json(row[9]), - } - - return event_report - - return await self.db_pool.runInteraction( - "get_event_report", _get_event_report_txn, report_id - ) - - async def get_event_reports_paginate( - self, - start: int, - limit: int, - direction: Direction = Direction.BACKWARDS, - user_id: Optional[str] = None, - room_id: Optional[str] = None, - ) -> Tuple[List[Dict[str, Any]], int]: - """Retrieve a paginated list of event reports - - Args: - start: event offset to begin the query from - limit: number of rows to retrieve - direction: Whether to fetch the most recent first (backwards) or the - oldest first (forwards) - user_id: search for user_id. Ignored if user_id is None - room_id: search for room_id. Ignored if room_id is None - Returns: - Tuple of: - json list of event reports - total number of event reports matching the filter criteria - """ - - def _get_event_reports_paginate_txn( - txn: LoggingTransaction, - ) -> Tuple[List[Dict[str, Any]], int]: - filters = [] - args: List[object] = [] - - if user_id: - filters.append("er.user_id LIKE ?") - args.extend(["%" + user_id + "%"]) - if room_id: - filters.append("er.room_id LIKE ?") - args.extend(["%" + room_id + "%"]) - - if direction == Direction.BACKWARDS: - order = "DESC" - else: - order = "ASC" - - where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" - - # We join on room_stats_state despite not using any columns from it - # because the join can influence the number of rows returned; - # e.g. a room that doesn't have state, maybe because it was deleted. - # The query returning the total count should be consistent with - # the query returning the results. - sql = """ - SELECT COUNT(*) as total_event_reports - FROM event_reports AS er - JOIN room_stats_state ON room_stats_state.room_id = er.room_id - {} - """.format( - where_clause - ) - txn.execute(sql, args) - count = cast(Tuple[int], txn.fetchone())[0] - - sql = """ - SELECT - er.id, - er.received_ts, - er.room_id, - er.event_id, - er.user_id, - er.content, - events.sender, - room_stats_state.canonical_alias, - room_stats_state.name - FROM event_reports AS er - LEFT JOIN events - ON events.event_id = er.event_id - JOIN room_stats_state - ON room_stats_state.room_id = er.room_id - {where_clause} - ORDER BY er.received_ts {order} - LIMIT ? - OFFSET ? - """.format( - where_clause=where_clause, - order=order, - ) - - args += [limit, start] - txn.execute(sql, args) - - event_reports = [] - for row in txn: - try: - s = db_to_json(row[5]).get("score") - r = db_to_json(row[5]).get("reason") - except Exception: - logger.error("Unable to parse json from event_reports: %s", row[0]) - continue - event_reports.append( - { - "id": row[0], - "received_ts": row[1], - "room_id": row[2], - "event_id": row[3], - "user_id": row[4], - "score": s, - "reason": r, - "sender": row[6], - "canonical_alias": row[7], - "name": row[8], - } - ) - - return event_reports, count - - return await self.db_pool.runInteraction( - "get_event_reports_paginate", _get_event_reports_paginate_txn - ) + return next_id async def block_room(self, room_id: str, user_id: str) -> None: """Marks the room as blocked. diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index c3f2b61bd5..f16a509ac4 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -14,6 +14,7 @@ import logging import re +import unicodedata from typing import ( TYPE_CHECKING, Iterable, @@ -490,6 +491,11 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): values={"display_name": display_name, "avatar_url": avatar_url}, ) + # The display name that goes into the database index. + index_display_name = display_name + if index_display_name is not None: + index_display_name = _filter_text_for_index(index_display_name) + if isinstance(self.database_engine, PostgresEngine): # We weight the localpart most highly, then display name and finally # server name @@ -507,11 +513,15 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): user_id, get_localpart_from_id(user_id), get_domain_from_id(user_id), - display_name, + index_display_name, ), ) elif isinstance(self.database_engine, Sqlite3Engine): - value = "%s %s" % (user_id, display_name) if display_name else user_id + value = ( + "%s %s" % (user_id, index_display_name) + if index_display_name + else user_id + ) self.db_pool.simple_upsert_txn( txn, table="user_directory_search", @@ -896,6 +906,41 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): return {"limited": limited, "results": results[0:limit]} +def _filter_text_for_index(text: str) -> str: + """Transforms text before it is inserted into the user directory index, or searched + for in the user directory index. + + Note that the user directory search table needs to be rebuilt whenever this function + changes. + """ + # Lowercase the text, to make searches case-insensitive. + # This is necessary for both PostgreSQL and SQLite. PostgreSQL's + # `to_tsquery/to_tsvector` functions don't lowercase non-ASCII characters when using + # the "C" collation, while SQLite just doesn't lowercase non-ASCII characters at + # all. + text = text.lower() + + # Normalize the text. NFKC normalization has two effects: + # 1. It canonicalizes the text, ie. maps all visually identical strings to the same + # string. For example, ["e", "◌́"] is mapped to ["é"]. + # 2. It maps strings that are roughly equivalent to the same string. + # For example, ["dž"] is mapped to ["d", "ž"], ["①"] to ["1"] and ["i⁹"] to + # ["i", "9"]. + text = unicodedata.normalize("NFKC", text) + + # Note that nothing is done to make searches accent-insensitive. + # That could be achieved by converting to NFKD form instead (with combining accents + # split out) and filtering out combining accents using `unicodedata.combining(c)`. + # The downside of this may be noisier search results, since search terms with + # explicit accents will match characters with no accents, or completely different + # accents. + # + # text = unicodedata.normalize("NFKD", text) + # text = "".join([c for c in text if not unicodedata.combining(c)]) + + return text + + def _parse_query_sqlite(search_term: str) -> str: """Takes a plain unicode string from the user and converts it into a form that can be passed to database. @@ -905,6 +950,7 @@ def _parse_query_sqlite(search_term: str) -> str: We specifically add both a prefix and non prefix matching term so that exact matches get ranked higher. """ + search_term = _filter_text_for_index(search_term) # Pull out the individual words, discarding any non-word characters. results = _parse_words(search_term) @@ -917,6 +963,8 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]: We use this so that we can add prefix matching, which isn't something that is supported by default. """ + search_term = _filter_text_for_index(search_term) + escaped_words = [] for word in _parse_words(search_term): # Postgres tsvector and tsquery quoting rules: diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 89b1faa6c8..bf4cdfdf29 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -18,6 +18,8 @@ from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Se import attr from synapse.api.constants import EventTypes +from synapse.events import EventBase +from synapse.events.snapshot import UnpersistedEventContext, UnpersistedEventContextBase from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, @@ -401,6 +403,123 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): fetched_keys=non_member_types, ) + async def store_state_deltas_for_batched( + self, + events_and_context: List[Tuple[EventBase, UnpersistedEventContextBase]], + room_id: str, + prev_group: int, + ) -> List[Tuple[EventBase, UnpersistedEventContext]]: + """Generate and store state deltas for a group of events and contexts created to be + batch persisted. Note that all the events must be in a linear chain (ie a <- b <- c). + + Args: + events_and_context: the events to generate and store a state groups for + and their associated contexts + room_id: the id of the room the events were created for + prev_group: the state group of the last event persisted before the batched events + were created + """ + + def insert_deltas_group_txn( + txn: LoggingTransaction, + events_and_context: List[Tuple[EventBase, UnpersistedEventContext]], + prev_group: int, + ) -> List[Tuple[EventBase, UnpersistedEventContext]]: + """Generate and store state groups for the provided events and contexts. + + Requires that we have the state as a delta from the last persisted state group. + + Returns: + A list of state groups + """ + is_in_db = self.db_pool.simple_select_one_onecol_txn( + txn, + table="state_groups", + keyvalues={"id": prev_group}, + retcol="id", + allow_none=True, + ) + if not is_in_db: + raise Exception( + "Trying to persist state with unpersisted prev_group: %r" + % (prev_group,) + ) + + num_state_groups = sum( + 1 for event, _ in events_and_context if event.is_state() + ) + + state_groups = self._state_group_seq_gen.get_next_mult_txn( + txn, num_state_groups + ) + + sg_before = prev_group + state_group_iter = iter(state_groups) + for event, context in events_and_context: + if not event.is_state(): + context.state_group_after_event = sg_before + context.state_group_before_event = sg_before + continue + + sg_after = next(state_group_iter) + context.state_group_after_event = sg_after + context.state_group_before_event = sg_before + context.state_delta_due_to_event = { + (event.type, event.state_key): event.event_id + } + sg_before = sg_after + + self.db_pool.simple_insert_many_txn( + txn, + table="state_groups", + keys=("id", "room_id", "event_id"), + values=[ + (context.state_group_after_event, room_id, event.event_id) + for event, context in events_and_context + if event.is_state() + ], + ) + + self.db_pool.simple_insert_many_txn( + txn, + table="state_group_edges", + keys=("state_group", "prev_state_group"), + values=[ + ( + context.state_group_after_event, + context.state_group_before_event, + ) + for event, context in events_and_context + if event.is_state() + ], + ) + + self.db_pool.simple_insert_many_txn( + txn, + table="state_groups_state", + keys=("state_group", "room_id", "type", "state_key", "event_id"), + values=[ + ( + context.state_group_after_event, + room_id, + key[0], + key[1], + state_id, + ) + for event, context in events_and_context + if context.state_delta_due_to_event is not None + for key, state_id in context.state_delta_due_to_event.items() + ], + ) + return events_and_context + + return await self.db_pool.runInteraction( + "store_state_deltas_for_batched.insert_deltas_group", + insert_deltas_group_txn, + events_and_context, + prev_group, + ) + async def store_state_group( self, event_id: str, diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 9adff3f4f5..334d3d718b 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -158,6 +158,15 @@ class AbstractStreamIdGenerator(AbstractStreamIdTracker): """ raise NotImplementedError() + @abc.abstractmethod + def get_next_txn(self, txn: LoggingTransaction) -> int: + """ + Usage: + stream_id_gen.get_next_txn(txn) + # ... persist events ... + """ + raise NotImplementedError() + class StreamIdGenerator(AbstractStreamIdGenerator): """Generates and tracks stream IDs for a stream with a single writer. @@ -263,6 +272,40 @@ class StreamIdGenerator(AbstractStreamIdGenerator): return _AsyncCtxManagerWrapper(manager()) + def get_next_txn(self, txn: LoggingTransaction) -> int: + """ + Retrieve the next stream ID from within a database transaction. + + Clean-up functions will be called when the transaction finishes. + + Args: + txn: The database transaction object. + + Returns: + The next stream ID. + """ + if not self._is_writer: + raise Exception("Tried to allocate stream ID on non-writer") + + # Get the next stream ID. + with self._lock: + self._current += self._step + next_id = self._current + + self._unfinished_ids[next_id] = next_id + + def clear_unfinished_id(id_to_clear: int) -> None: + """A function to mark processing this ID as finished""" + with self._lock: + self._unfinished_ids.pop(id_to_clear) + + # Mark this ID as finished once the database transaction itself finishes. + txn.call_after(clear_unfinished_id, next_id) + txn.call_on_exception(clear_unfinished_id, next_id) + + # Return the new ID. + return next_id + def get_current_token(self) -> int: if not self._is_writer: return self._current @@ -568,7 +611,7 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator): """ Usage: - stream_id = stream_id_gen.get_next(txn) + stream_id = stream_id_gen.get_next_txn(txn) # ... persist event ... """ diff --git a/synapse/storage/util/sequence.py b/synapse/storage/util/sequence.py index 75268cbe15..80915216de 100644 --- a/synapse/storage/util/sequence.py +++ b/synapse/storage/util/sequence.py @@ -205,7 +205,7 @@ class LocalSequenceGenerator(SequenceGenerator): """ Args: get_first_callback: a callback which is called on the first call to - get_next_id_txn; should return the curreent maximum id + get_next_id_txn; should return the current maximum id """ # the callback. this is cleared after it is called, so that it can be GCed. self._callback: Optional[GetFirstCallbackType] = get_first_callback diff --git a/tests/handlers/test_message.py b/tests/handlers/test_message.py index 69d384442f..9691d66b48 100644 --- a/tests/handlers/test_message.py +++ b/tests/handlers/test_message.py @@ -18,7 +18,7 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EventTypes from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import EventContext, UnpersistedEventContextBase from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer @@ -79,7 +79,9 @@ class EventCreationTestCase(unittest.HomeserverTestCase): return memberEvent, memberEventContext - def _create_duplicate_event(self, txn_id: str) -> Tuple[EventBase, EventContext]: + def _create_duplicate_event( + self, txn_id: str + ) -> Tuple[EventBase, UnpersistedEventContextBase]: """Create a new event with the given transaction ID. All events produced by this method will be considered duplicates. """ @@ -107,7 +109,8 @@ class EventCreationTestCase(unittest.HomeserverTestCase): txn_id = "something_suitably_random" - event1, context = self._create_duplicate_event(txn_id) + event1, unpersisted_context = self._create_duplicate_event(txn_id) + context = self.get_success(unpersisted_context.persist(event1)) ret_event1 = self.get_success( self.handler.handle_new_client_event( @@ -119,7 +122,8 @@ class EventCreationTestCase(unittest.HomeserverTestCase): self.assertEqual(event1.event_id, ret_event1.event_id) - event2, context = self._create_duplicate_event(txn_id) + event2, unpersisted_context = self._create_duplicate_event(txn_id) + context = self.get_success(unpersisted_context.persist(event2)) # We want to test that the deduplication at the persit event end works, # so we want to make sure we test with different events. @@ -140,7 +144,9 @@ class EventCreationTestCase(unittest.HomeserverTestCase): # Let's test that calling `persist_event` directly also does the right # thing. - event3, context = self._create_duplicate_event(txn_id) + event3, unpersisted_context = self._create_duplicate_event(txn_id) + context = self.get_success(unpersisted_context.persist(event3)) + self.assertNotEqual(event1.event_id, event3.event_id) ret_event3, event_pos3, _ = self.get_success( @@ -154,7 +160,8 @@ class EventCreationTestCase(unittest.HomeserverTestCase): # Let's test that calling `persist_events` directly also does the right # thing. - event4, context = self._create_duplicate_event(txn_id) + event4, unpersisted_context = self._create_duplicate_event(txn_id) + context = self.get_success(unpersisted_context.persist(event4)) self.assertNotEqual(event1.event_id, event3.event_id) events, _ = self.get_success( @@ -174,8 +181,10 @@ class EventCreationTestCase(unittest.HomeserverTestCase): txn_id = "something_else_suitably_random" # Create two duplicate events to persist at the same time - event1, context1 = self._create_duplicate_event(txn_id) - event2, context2 = self._create_duplicate_event(txn_id) + event1, unpersisted_context1 = self._create_duplicate_event(txn_id) + context1 = self.get_success(unpersisted_context1.persist(event1)) + event2, unpersisted_context2 = self._create_duplicate_event(txn_id) + context2 = self.get_success(unpersisted_context2.persist(event2)) # Ensure their event IDs are different to start with self.assertNotEqual(event1.event_id, event2.event_id) diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index 1db99b3c00..aff1ec4758 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -507,7 +507,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase): # Lower the permissions of the inviter. event_creation_handler = self.hs.get_event_creation_handler() requester = create_requester(inviter) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_creation_handler.create_event( requester, { @@ -519,6 +519,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase): }, ) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( event_creation_handler.handle_new_client_event( requester, events_and_context=[(event, context)] diff --git a/tests/rest/media/v1/__init__.py b/tests/media/__init__.py index b1ee10cfcc..68910cbf5b 100644 --- a/tests/rest/media/v1/__init__.py +++ b/tests/media/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2018 New Vector Ltd +# Copyright 2023 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/rest/media/v1/test_base.py b/tests/media/test_base.py index c73179151a..66498c744d 100644 --- a/tests/rest/media/v1/test_base.py +++ b/tests/media/test_base.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.rest.media.v1._base import get_filename_from_headers +from synapse.media._base import get_filename_from_headers from tests import unittest diff --git a/tests/rest/media/v1/test_filepath.py b/tests/media/test_filepath.py index 43e6f0f70a..95e3b83d5a 100644 --- a/tests/rest/media/v1/test_filepath.py +++ b/tests/media/test_filepath.py @@ -15,7 +15,7 @@ import inspect import os from typing import Iterable -from synapse.rest.media.v1.filepath import MediaFilePaths, _wrap_with_jail_check +from synapse.media.filepath import MediaFilePaths, _wrap_with_jail_check from tests import unittest diff --git a/tests/rest/media/v1/test_html_preview.py b/tests/media/test_html_preview.py index 1062081a06..e7da75db3e 100644 --- a/tests/rest/media/v1/test_html_preview.py +++ b/tests/media/test_html_preview.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.rest.media.v1.preview_html import ( +from synapse.media.preview_html import ( _get_html_media_encodings, decode_body, parse_html_to_open_graph, diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/media/test_media_storage.py index 8ed27179c4..870047d0f2 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/media/test_media_storage.py @@ -34,13 +34,13 @@ from synapse.events import EventBase from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.http.types import QueryParams from synapse.logging.context import make_deferred_yieldable +from synapse.media._base import FileInfo +from synapse.media.filepath import MediaFilePaths +from synapse.media.media_storage import MediaStorage, ReadableFileWrapper +from synapse.media.storage_provider import FileStorageProviderBackend from synapse.module_api import ModuleApi from synapse.rest import admin from synapse.rest.client import login -from synapse.rest.media.v1._base import FileInfo -from synapse.rest.media.v1.filepath import MediaFilePaths -from synapse.rest.media.v1.media_storage import MediaStorage, ReadableFileWrapper -from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend from synapse.server import HomeServer from synapse.types import JsonDict, RoomAlias from synapse.util import Clock @@ -253,7 +253,7 @@ class MediaRepoTests(unittest.HomeserverTestCase): config["max_image_pixels"] = 2000000 provider_config = { - "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend", + "module": "synapse.media.storage_provider.FileStorageProviderBackend", "store_local": True, "store_synchronous": False, "store_remote": True, diff --git a/tests/rest/media/v1/test_oembed.py b/tests/media/test_oembed.py index 3f7f1dbab9..c8bf8421da 100644 --- a/tests/rest/media/v1/test_oembed.py +++ b/tests/media/test_oembed.py @@ -18,7 +18,7 @@ from parameterized import parameterized from twisted.test.proto_helpers import MemoryReactor -from synapse.rest.media.v1.oembed import OEmbedProvider, OEmbedResult +from synapse.media.oembed import OEmbedProvider, OEmbedResult from synapse.server import HomeServer from synapse.types import JsonDict from synapse.util import Clock diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index dce6899e78..73fecfd4ad 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -130,7 +130,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): # Create a new message event, and try to evaluate it under the dodgy # power level event. - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_event( self.requester, { @@ -145,6 +145,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): prev_event_ids=[pl_event_id], ) ) + context = self.get_success(unpersisted_context.persist(event)) bulk_evaluator = BulkPushRuleEvaluator(self.hs) # should not raise @@ -170,7 +171,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): """Ensure that push rules are not calculated when disabled in the config""" # Create a new message event which should cause a notification. - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_event( self.requester, { @@ -184,6 +185,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): }, ) ) + context = self.get_success(unpersisted_context.persist(event)) bulk_evaluator = BulkPushRuleEvaluator(self.hs) # Mock the method which calculates push rules -- we do this instead of @@ -200,7 +202,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) -> bool: """Returns true iff the `mentions` trigger an event push action.""" # Create a new message event which should cause a notification. - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_event( self.requester, { @@ -211,7 +213,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): }, ) ) - + context = self.get_success(unpersisted_context.persist(event)) # Execute the push rule machinery. self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)])) @@ -231,6 +233,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): "experimental_features": { "msc3758_exact_event_match": True, "msc3952_intentional_mentions": True, + "msc3966_exact_event_property_contains": True, } } ) @@ -334,6 +337,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): "experimental_features": { "msc3758_exact_event_match": True, "msc3952_intentional_mentions": True, + "msc3966_exact_event_property_contains": True, } } ) @@ -390,7 +394,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): bulk_evaluator = BulkPushRuleEvaluator(self.hs) # Create & persist an event to use as the parent of the relation. - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_event( self.requester, { @@ -404,6 +408,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): }, ) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( self.event_creation_handler.handle_new_client_event( self.requester, events_and_context=[(event, context)] diff --git a/tests/push/test_email.py b/tests/push/test_email.py index 0a3aca5c50..4ea5472eb4 100644 --- a/tests/push/test_email.py +++ b/tests/push/test_email.py @@ -369,10 +369,8 @@ class EmailPusherTests(HomeserverTestCase): # disassociate the user's email address self.get_success( - self.auth_handler.delete_threepid( - user_id=self.user_id, - medium="email", - address="a@example.com", + self.auth_handler.delete_local_threepid( + user_id=self.user_id, medium="email", address="a@example.com" ) ) diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 4e858fd16f..d4a4bc4d93 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List, Optional, Set, Union, cast +from typing import Any, Dict, List, Optional, Union, cast import frozendict @@ -147,8 +147,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): self, content: JsonMapping, *, - has_mentions: bool = False, - user_mentions: Optional[Set[str]] = None, related_events: Optional[JsonDict] = None, ) -> PushRuleEvaluator: event = FrozenEvent( @@ -167,8 +165,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): power_levels: Dict[str, Union[int, Dict[str, int]]] = {} return PushRuleEvaluator( _flatten_dict(event), - has_mentions, - user_mentions or set(), + False, room_member_count, sender_power_level, cast(Dict[str, int], power_levels.get("notifications", {})), @@ -204,32 +201,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): # A display name with spaces should work fine. self.assertTrue(evaluator.matches(condition, "@user:test", "foo bar")) - def test_user_mentions(self) -> None: - """Check for user mentions.""" - condition = {"kind": "org.matrix.msc3952.is_user_mention"} - - # No mentions shouldn't match. - evaluator = self._get_evaluator({}, has_mentions=True) - self.assertFalse(evaluator.matches(condition, "@user:test", None)) - - # An empty set shouldn't match - evaluator = self._get_evaluator({}, has_mentions=True, user_mentions=set()) - self.assertFalse(evaluator.matches(condition, "@user:test", None)) - - # The Matrix ID appearing anywhere in the mentions list should match - evaluator = self._get_evaluator( - {}, has_mentions=True, user_mentions={"@user:test"} - ) - self.assertTrue(evaluator.matches(condition, "@user:test", None)) - - evaluator = self._get_evaluator( - {}, has_mentions=True, user_mentions={"@another:test", "@user:test"} - ) - self.assertTrue(evaluator.matches(condition, "@user:test", None)) - - # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions - # since the BulkPushRuleEvaluator is what handles data sanitisation. - def _assert_matches( self, condition: JsonDict, content: JsonMapping, msg: Optional[str] = None ) -> None: @@ -401,6 +372,33 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): "pattern should not match before a newline", ) + def test_event_match_pattern(self) -> None: + """Check that event_match conditions do not use a "pattern_type" from user data.""" + + # The pattern_type should not be deserialized into anything valid. + condition = { + "kind": "event_match", + "key": "content.value", + "pattern_type": "user_id", + } + self._assert_not_matches( + condition, + {"value": "@user:test"}, + "should not be possible to pass a pattern_type in", + ) + + # This is an internal-only condition which shouldn't get deserialized. + condition = { + "kind": "event_match_type", + "key": "content.value", + "pattern_type": "user_id", + } + self._assert_not_matches( + condition, + {"value": "@user:test"}, + "should not be possible to pass a pattern_type in", + ) + def test_exact_event_match_string(self) -> None: """Check that exact_event_match conditions work as expected for strings.""" diff --git a/tests/replication/tcp/test_handler.py b/tests/replication/tcp/test_handler.py index bf927beb6a..bab77b2df7 100644 --- a/tests/replication/tcp/test_handler.py +++ b/tests/replication/tcp/test_handler.py @@ -141,3 +141,64 @@ class ChannelsTestCase(BaseMultiWorkerStreamTestCase): self.get_success(ctx_worker1.__aexit__(None, None, None)) self.assertTrue(d.called) + + def test_wait_for_stream_position_rdata(self) -> None: + """Check that wait for stream position correctly waits for an update + from the correct instance, when RDATA is sent. + """ + store = self.hs.get_datastores().main + cmd_handler = self.hs.get_replication_command_handler() + data_handler = self.hs.get_replication_data_handler() + + worker1 = self.make_worker_hs( + "synapse.app.generic_worker", + extra_config={ + "worker_name": "worker1", + "run_background_tasks_on": "worker1", + "redis": {"enabled": True}, + }, + ) + + cache_id_gen = worker1.get_datastores().main._cache_id_gen + assert cache_id_gen is not None + + self.replicate() + + # First, make sure the master knows that `worker1` exists. + initial_token = cache_id_gen.get_current_token() + cmd_handler.send_command( + PositionCommand("caches", "worker1", initial_token, initial_token) + ) + self.replicate() + + # `wait_for_stream_position` should only return once master receives a + # notification that `next_token2` has persisted. + ctx_worker1 = cache_id_gen.get_next_mult(2) + next_token1, next_token2 = self.get_success(ctx_worker1.__aenter__()) + + d = defer.ensureDeferred( + data_handler.wait_for_stream_position("worker1", "caches", next_token2) + ) + self.assertFalse(d.called) + + # Insert an entry into the cache stream with token `next_token1`, but + # not `next_token2`. + self.get_success( + store.db_pool.simple_insert( + table="cache_invalidation_stream_by_instance", + values={ + "stream_id": next_token1, + "instance_name": "worker1", + "cache_func": "foo", + "keys": [], + "invalidation_ts": 0, + }, + ) + ) + + # Finish the context manager, triggering the data to be sent to master. + self.get_success(ctx_worker1.__aexit__(None, None, None)) + + # Master should get told about `next_token2`, so the deferred should + # resolve. + self.assertTrue(d.called) diff --git a/tests/rest/admin/test_event_reports.py b/tests/rest/admin/test_event_reports.py index 233eba3516..f189b07769 100644 --- a/tests/rest/admin/test_event_reports.py +++ b/tests/rest/admin/test_event_reports.py @@ -78,7 +78,7 @@ class EventReportsTestCase(unittest.HomeserverTestCase): """ Try to get an event report without authentication. """ - channel = self.make_request("GET", self.url, b"{}") + channel = self.make_request("GET", self.url, {}) self.assertEqual(401, channel.code, msg=channel.json_body) self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) @@ -473,7 +473,7 @@ class EventReportDetailTestCase(unittest.HomeserverTestCase): """ Try to get event report without authentication. """ - channel = self.make_request("GET", self.url, b"{}") + channel = self.make_request("GET", self.url, {}) self.assertEqual(401, channel.code, msg=channel.json_body) self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) @@ -599,3 +599,142 @@ class EventReportDetailTestCase(unittest.HomeserverTestCase): self.assertIn("room_id", content["event_json"]) self.assertIn("sender", content["event_json"]) self.assertIn("content", content["event_json"]) + + +class DeleteEventReportTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self._store = hs.get_datastores().main + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.other_user = self.register_user("user", "pass") + self.other_user_tok = self.login("user", "pass") + + # create report + event_id = self.get_success( + self._store.add_event_report( + "room_id", + "event_id", + self.other_user, + "this makes me sad", + {}, + self.clock.time_msec(), + ) + ) + + self.url = f"/_synapse/admin/v1/event_reports/{event_id}" + + def test_no_auth(self) -> None: + """ + Try to delete event report without authentication. + """ + channel = self.make_request("DELETE", self.url) + + self.assertEqual(401, channel.code, msg=channel.json_body) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self) -> None: + """ + If the user is not a server admin, an error 403 is returned. + """ + + channel = self.make_request( + "DELETE", + self.url, + access_token=self.other_user_tok, + ) + + self.assertEqual(403, channel.code, msg=channel.json_body) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_delete_success(self) -> None: + """ + Testing delete a report. + """ + + channel = self.make_request( + "DELETE", + self.url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual({}, channel.json_body) + + channel = self.make_request( + "GET", + self.url, + access_token=self.admin_user_tok, + ) + + # check that report was deleted + self.assertEqual(404, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + + def test_invalid_report_id(self) -> None: + """ + Testing that an invalid `report_id` returns a 400. + """ + + # `report_id` is negative + channel = self.make_request( + "DELETE", + "/_synapse/admin/v1/event_reports/-123", + access_token=self.admin_user_tok, + ) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "The report_id parameter must be a string representing a positive integer.", + channel.json_body["error"], + ) + + # `report_id` is a non-numerical string + channel = self.make_request( + "DELETE", + "/_synapse/admin/v1/event_reports/abcdef", + access_token=self.admin_user_tok, + ) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "The report_id parameter must be a string representing a positive integer.", + channel.json_body["error"], + ) + + # `report_id` is undefined + channel = self.make_request( + "DELETE", + "/_synapse/admin/v1/event_reports/", + access_token=self.admin_user_tok, + ) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "The report_id parameter must be a string representing a positive integer.", + channel.json_body["error"], + ) + + def test_report_id_not_found(self) -> None: + """ + Testing that a not existing `report_id` returns a 404. + """ + + channel = self.make_request( + "DELETE", + "/_synapse/admin/v1/event_reports/123", + access_token=self.admin_user_tok, + ) + + self.assertEqual(404, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + self.assertEqual("Event report not found", channel.json_body["error"]) diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index f41319a5b6..6d04911d67 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -20,8 +20,8 @@ from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin from synapse.api.errors import Codes +from synapse.media.filepath import MediaFilePaths from synapse.rest.client import login, profile, room -from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.server import HomeServer from synapse.util import Clock diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index f5b213219f..4b8f889a71 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -28,8 +28,8 @@ import synapse.rest.admin from synapse.api.constants import ApprovalNoticeMedium, LoginType, UserTypes from synapse.api.errors import Codes, HttpResponseException, ResourceLimitError from synapse.api.room_versions import RoomVersions +from synapse.media.filepath import MediaFilePaths from synapse.rest.client import devices, login, logout, profile, register, room, sync -from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.server import HomeServer from synapse.types import JsonDict, UserID, create_requester from synapse.util import Clock diff --git a/tests/rest/client/test_keys.py b/tests/rest/client/test_keys.py index 741fecea77..8ee5489057 100644 --- a/tests/rest/client/test_keys.py +++ b/tests/rest/client/test_keys.py @@ -14,12 +14,21 @@ from http import HTTPStatus +from signedjson.key import ( + encode_verify_key_base64, + generate_signing_key, + get_verify_key, +) +from signedjson.sign import sign_json + from synapse.api.errors import Codes from synapse.rest import admin from synapse.rest.client import keys, login +from synapse.types import JsonDict from tests import unittest from tests.http.server._base import make_request_with_cancellation_test +from tests.unittest import override_config class KeyQueryTestCase(unittest.HomeserverTestCase): @@ -118,3 +127,135 @@ class KeyQueryTestCase(unittest.HomeserverTestCase): self.assertEqual(200, channel.code, msg=channel.result["body"]) self.assertIn(bob, channel.json_body["device_keys"]) + + def make_device_keys(self, user_id: str, device_id: str) -> JsonDict: + # We only generate a master key to simplify the test. + master_signing_key = generate_signing_key(device_id) + master_verify_key = encode_verify_key_base64(get_verify_key(master_signing_key)) + + return { + "master_key": sign_json( + { + "user_id": user_id, + "usage": ["master"], + "keys": {"ed25519:" + master_verify_key: master_verify_key}, + }, + user_id, + master_signing_key, + ), + } + + def test_device_signing_with_uia(self) -> None: + """Device signing key upload requires UIA.""" + password = "wonderland" + device_id = "ABCDEFGHI" + alice_id = self.register_user("alice", password) + alice_token = self.login("alice", password, device_id=device_id) + + content = self.make_device_keys(alice_id, device_id) + + channel = self.make_request( + "POST", + "/_matrix/client/v3/keys/device_signing/upload", + content, + alice_token, + ) + + self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result) + # Grab the session + session = channel.json_body["session"] + # Ensure that flows are what is expected. + self.assertIn({"stages": ["m.login.password"]}, channel.json_body["flows"]) + + # add UI auth + content["auth"] = { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": alice_id}, + "password": password, + "session": session, + } + + channel = self.make_request( + "POST", + "/_matrix/client/v3/keys/device_signing/upload", + content, + alice_token, + ) + + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) + + @override_config({"ui_auth": {"session_timeout": "15m"}}) + def test_device_signing_with_uia_session_timeout(self) -> None: + """Device signing key upload requires UIA buy passes with grace period.""" + password = "wonderland" + device_id = "ABCDEFGHI" + alice_id = self.register_user("alice", password) + alice_token = self.login("alice", password, device_id=device_id) + + content = self.make_device_keys(alice_id, device_id) + + channel = self.make_request( + "POST", + "/_matrix/client/v3/keys/device_signing/upload", + content, + alice_token, + ) + + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) + + @override_config( + { + "experimental_features": {"msc3967_enabled": True}, + "ui_auth": {"session_timeout": "15s"}, + } + ) + def test_device_signing_with_msc3967(self) -> None: + """Device signing key follows MSC3967 behaviour when enabled.""" + password = "wonderland" + device_id = "ABCDEFGHI" + alice_id = self.register_user("alice", password) + alice_token = self.login("alice", password, device_id=device_id) + + keys1 = self.make_device_keys(alice_id, device_id) + + # Initial request should succeed as no existing keys are present. + channel = self.make_request( + "POST", + "/_matrix/client/v3/keys/device_signing/upload", + keys1, + alice_token, + ) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) + + keys2 = self.make_device_keys(alice_id, device_id) + + # Subsequent request should require UIA as keys already exist even though session_timeout is set. + channel = self.make_request( + "POST", + "/_matrix/client/v3/keys/device_signing/upload", + keys2, + alice_token, + ) + self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result) + + # Grab the session + session = channel.json_body["session"] + # Ensure that flows are what is expected. + self.assertIn({"stages": ["m.login.password"]}, channel.json_body["flows"]) + + # add UI auth + keys2["auth"] = { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": alice_id}, + "password": password, + "session": session, + } + + # Request should complete + channel = self.make_request( + "POST", + "/_matrix/client/v3/keys/device_signing/upload", + keys2, + alice_token, + ) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index c8a6911d5e..a8a0a16141 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -1080,48 +1080,6 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): ] assert_bundle(self._find_event_in_chunk(chunk)) - def test_annotation(self) -> None: - """ - Test that annotations get correctly bundled. - """ - # Setup by sending a variety of relations. - self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") - self._send_relation( - RelationTypes.ANNOTATION, "m.reaction", "a", access_token=self.user2_token - ) - self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "b") - - def assert_annotations(bundled_aggregations: JsonDict) -> None: - self.assertEqual( - { - "chunk": [ - {"type": "m.reaction", "key": "a", "count": 2}, - {"type": "m.reaction", "key": "b", "count": 1}, - ] - }, - bundled_aggregations, - ) - - self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 7) - - def test_annotation_to_annotation(self) -> None: - """Any relation to an annotation should be ignored.""" - channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") - event_id = channel.json_body["event_id"] - self._send_relation( - RelationTypes.ANNOTATION, "m.reaction", "b", parent_id=event_id - ) - - # Fetch the initial annotation event to see if it has bundled aggregations. - channel = self.make_request( - "GET", - f"/_matrix/client/v3/rooms/{self.room}/event/{event_id}", - access_token=self.user_token, - ) - self.assertEquals(200, channel.code, channel.json_body) - # The first annotationt should not have any bundled aggregations. - self.assertNotIn("m.relations", channel.json_body["unsigned"]) - def test_reference(self) -> None: """ Test that references get correctly bundled. @@ -1138,7 +1096,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): bundled_aggregations, ) - self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 7) + self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 6) def test_thread(self) -> None: """ @@ -1183,7 +1141,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): # The "user" sent the root event and is making queries for the bundled # aggregations: they have participated. - self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 7) + self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 6) # The "user2" sent replies in the thread and is making queries for the # bundled aggregations: they have participated. # @@ -1208,9 +1166,10 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): channel = self._send_relation(RelationTypes.THREAD, "m.room.test") thread_2 = channel.json_body["event_id"] - self._send_relation( - RelationTypes.ANNOTATION, "m.reaction", "a", parent_id=thread_2 + channel = self._send_relation( + RelationTypes.REFERENCE, "org.matrix.test", parent_id=thread_2 ) + reference_event_id = channel.json_body["event_id"] def assert_thread(bundled_aggregations: JsonDict) -> None: self.assertEqual(2, bundled_aggregations.get("count")) @@ -1235,17 +1194,15 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): self.assert_dict( { "m.relations": { - RelationTypes.ANNOTATION: { - "chunk": [ - {"type": "m.reaction", "key": "a", "count": 1}, - ] + RelationTypes.REFERENCE: { + "chunk": [{"event_id": reference_event_id}] }, } }, bundled_aggregations["latest_event"].get("unsigned"), ) - self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 7) + self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 6) def test_nested_thread(self) -> None: """ @@ -1363,10 +1320,11 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): channel = self._send_relation(RelationTypes.THREAD, "m.room.test") thread_id = channel.json_body["event_id"] - # Annotate the thread. - self._send_relation( - RelationTypes.ANNOTATION, "m.reaction", "a", parent_id=thread_id + # Make a reference to the thread. + channel = self._send_relation( + RelationTypes.REFERENCE, "org.matrix.test", parent_id=thread_id ) + reference_event_id = channel.json_body["event_id"] channel = self.make_request( "GET", @@ -1377,9 +1335,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): self.assertEqual( channel.json_body["unsigned"].get("m.relations"), { - RelationTypes.ANNOTATION: { - "chunk": [{"count": 1, "key": "a", "type": "m.reaction"}] - }, + RelationTypes.REFERENCE: {"chunk": [{"event_id": reference_event_id}]}, }, ) @@ -1396,9 +1352,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): self.assertEqual( thread_message["unsigned"].get("m.relations"), { - RelationTypes.ANNOTATION: { - "chunk": [{"count": 1, "key": "a", "type": "m.reaction"}] - }, + RelationTypes.REFERENCE: {"chunk": [{"event_id": reference_event_id}]}, }, ) @@ -1410,7 +1364,8 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): Note that the spec allows for a server to return additional fields beyond what is specified. """ - self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") + channel = self._send_relation(RelationTypes.REFERENCE, "org.matrix.test") + reference_event_id = channel.json_body["event_id"] # Note that the sync filter does not include "unsigned" as a field. filter = urllib.parse.quote_plus( @@ -1428,7 +1383,12 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): # Ensure there's bundled aggregations on it. self.assertIn("unsigned", parent_event) - self.assertIn("m.relations", parent_event["unsigned"]) + self.assertEqual( + parent_event["unsigned"].get("m.relations"), + { + RelationTypes.REFERENCE: {"chunk": [{"event_id": reference_event_id}]}, + }, + ) class RelationIgnoredUserTestCase(BaseRelationsTestCase): @@ -1475,53 +1435,8 @@ class RelationIgnoredUserTestCase(BaseRelationsTestCase): return before_aggregations[relation_type], after_aggregations[relation_type] - def test_annotation(self) -> None: - """Annotations should ignore""" - # Send 2 from us, 2 from the to be ignored user. - allowed_event_ids = [] - ignored_event_ids = [] - channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="a") - allowed_event_ids.append(channel.json_body["event_id"]) - channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="b") - allowed_event_ids.append(channel.json_body["event_id"]) - channel = self._send_relation( - RelationTypes.ANNOTATION, - "m.reaction", - key="a", - access_token=self.user2_token, - ) - ignored_event_ids.append(channel.json_body["event_id"]) - channel = self._send_relation( - RelationTypes.ANNOTATION, - "m.reaction", - key="c", - access_token=self.user2_token, - ) - ignored_event_ids.append(channel.json_body["event_id"]) - - before_aggregations, after_aggregations = self._test_ignored_user( - RelationTypes.ANNOTATION, allowed_event_ids, ignored_event_ids - ) - - self.assertCountEqual( - before_aggregations["chunk"], - [ - {"type": "m.reaction", "key": "a", "count": 2}, - {"type": "m.reaction", "key": "b", "count": 1}, - {"type": "m.reaction", "key": "c", "count": 1}, - ], - ) - - self.assertCountEqual( - after_aggregations["chunk"], - [ - {"type": "m.reaction", "key": "a", "count": 1}, - {"type": "m.reaction", "key": "b", "count": 1}, - ], - ) - def test_reference(self) -> None: - """Annotations should ignore""" + """Aggregations should exclude reference relations from ignored users""" channel = self._send_relation(RelationTypes.REFERENCE, "m.room.test") allowed_event_ids = [channel.json_body["event_id"]] @@ -1544,7 +1459,7 @@ class RelationIgnoredUserTestCase(BaseRelationsTestCase): ) def test_thread(self) -> None: - """Annotations should ignore""" + """Aggregations should exclude thread releations from ignored users""" channel = self._send_relation(RelationTypes.THREAD, "m.room.test") allowed_event_ids = [channel.json_body["event_id"]] @@ -1618,43 +1533,6 @@ class RelationRedactionTestCase(BaseRelationsTestCase): for t in threads ] - def test_redact_relation_annotation(self) -> None: - """ - Test that annotations of an event are properly handled after the - annotation is redacted. - - The redacted relation should not be included in bundled aggregations or - the response to relations. - """ - channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") - to_redact_event_id = channel.json_body["event_id"] - - channel = self._send_relation( - RelationTypes.ANNOTATION, "m.reaction", "a", access_token=self.user2_token - ) - unredacted_event_id = channel.json_body["event_id"] - - # Both relations should exist. - event_ids = self._get_related_events() - relations = self._get_bundled_aggregations() - self.assertCountEqual(event_ids, [to_redact_event_id, unredacted_event_id]) - self.assertEquals( - relations["m.annotation"], - {"chunk": [{"type": "m.reaction", "key": "a", "count": 2}]}, - ) - - # Redact one of the reactions. - self._redact(to_redact_event_id) - - # The unredacted relation should still exist. - event_ids = self._get_related_events() - relations = self._get_bundled_aggregations() - self.assertEquals(event_ids, [unredacted_event_id]) - self.assertEquals( - relations["m.annotation"], - {"chunk": [{"type": "m.reaction", "key": "a", "count": 1}]}, - ) - def test_redact_relation_thread(self) -> None: """ Test that thread replies are properly handled after the thread reply redacted. @@ -1775,14 +1653,14 @@ class RelationRedactionTestCase(BaseRelationsTestCase): is redacted. """ # Add a relation - channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="👍") + channel = self._send_relation(RelationTypes.REFERENCE, "org.matrix.test") related_event_id = channel.json_body["event_id"] # The relations should exist. event_ids = self._get_related_events() relations = self._get_bundled_aggregations() self.assertEqual(len(event_ids), 1) - self.assertIn(RelationTypes.ANNOTATION, relations) + self.assertIn(RelationTypes.REFERENCE, relations) # Redact the original event. self._redact(self.parent_id) @@ -1792,8 +1670,8 @@ class RelationRedactionTestCase(BaseRelationsTestCase): relations = self._get_bundled_aggregations() self.assertEquals(event_ids, [related_event_id]) self.assertEquals( - relations["m.annotation"], - {"chunk": [{"type": "m.reaction", "key": "👍", "count": 1}]}, + relations[RelationTypes.REFERENCE], + {"chunk": [{"event_id": related_event_id}]}, ) def test_redact_parent_thread(self) -> None: diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 4dd763096d..a4900703c4 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -713,7 +713,7 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(33, channel.resource_usage.db_txn_count) + self.assertEqual(30, channel.resource_usage.db_txn_count) def test_post_room_initial_state(self) -> None: # POST with initial_state config key, expect new room id @@ -726,7 +726,7 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(36, channel.resource_usage.db_txn_count) + self.assertEqual(32, channel.resource_usage.db_txn_count) def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py index c0f93f898a..3b99513707 100644 --- a/tests/rest/client/test_third_party_rules.py +++ b/tests/rest/client/test_third_party_rules.py @@ -934,3 +934,124 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase): # Check that the mock was called with the right parameters self.assertEqual(args, (user_id, "email", "foo@example.com")) + + def test_on_add_and_remove_user_third_party_identifier(self) -> None: + """Tests that the on_add_user_third_party_identifier and + on_remove_user_third_party_identifier module callbacks are called + just before associating and removing a 3PID to/from an account. + """ + # Pretend to be a Synapse module and register both callbacks as mocks. + third_party_rules = self.hs.get_third_party_event_rules() + on_add_user_third_party_identifier_callback_mock = Mock( + return_value=make_awaitable(None) + ) + on_remove_user_third_party_identifier_callback_mock = Mock( + return_value=make_awaitable(None) + ) + third_party_rules._on_threepid_bind_callbacks.append( + on_add_user_third_party_identifier_callback_mock + ) + third_party_rules._on_threepid_bind_callbacks.append( + on_remove_user_third_party_identifier_callback_mock + ) + + # Register an admin user. + self.register_user("admin", "password", admin=True) + admin_tok = self.login("admin", "password") + + # Also register a normal user we can modify. + user_id = self.register_user("user", "password") + + # Add a 3PID to the user. + channel = self.make_request( + "PUT", + "/_synapse/admin/v2/users/%s" % user_id, + { + "threepids": [ + { + "medium": "email", + "address": "foo@example.com", + }, + ], + }, + access_token=admin_tok, + ) + + # Check that the mocked add callback was called with the appropriate + # 3PID details. + self.assertEqual(channel.code, 200, channel.json_body) + on_add_user_third_party_identifier_callback_mock.assert_called_once() + args = on_add_user_third_party_identifier_callback_mock.call_args[0] + self.assertEqual(args, (user_id, "email", "foo@example.com")) + + # Now remove the 3PID from the user + channel = self.make_request( + "PUT", + "/_synapse/admin/v2/users/%s" % user_id, + { + "threepids": [], + }, + access_token=admin_tok, + ) + + # Check that the mocked remove callback was called with the appropriate + # 3PID details. + self.assertEqual(channel.code, 200, channel.json_body) + on_remove_user_third_party_identifier_callback_mock.assert_called_once() + args = on_remove_user_third_party_identifier_callback_mock.call_args[0] + self.assertEqual(args, (user_id, "email", "foo@example.com")) + + def test_on_remove_user_third_party_identifier_is_called_on_deactivate( + self, + ) -> None: + """Tests that the on_remove_user_third_party_identifier module callback is called + when a user is deactivated and their third-party ID associations are deleted. + """ + # Pretend to be a Synapse module and register both callbacks as mocks. + third_party_rules = self.hs.get_third_party_event_rules() + on_remove_user_third_party_identifier_callback_mock = Mock( + return_value=make_awaitable(None) + ) + third_party_rules._on_threepid_bind_callbacks.append( + on_remove_user_third_party_identifier_callback_mock + ) + + # Register an admin user. + self.register_user("admin", "password", admin=True) + admin_tok = self.login("admin", "password") + + # Also register a normal user we can modify. + user_id = self.register_user("user", "password") + + # Add a 3PID to the user. + channel = self.make_request( + "PUT", + "/_synapse/admin/v2/users/%s" % user_id, + { + "threepids": [ + { + "medium": "email", + "address": "foo@example.com", + }, + ], + }, + access_token=admin_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Now deactivate the user. + channel = self.make_request( + "PUT", + "/_synapse/admin/v2/users/%s" % user_id, + { + "deactivated": True, + }, + access_token=admin_tok, + ) + + # Check that the mocked remove callback was called with the appropriate + # 3PID details. + self.assertEqual(channel.code, 200, channel.json_body) + on_remove_user_third_party_identifier_callback_mock.assert_called_once() + args = on_remove_user_third_party_identifier_callback_mock.call_args[0] + self.assertEqual(args, (user_id, "email", "foo@example.com")) diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/test_url_preview.py index 2acfccec61..e91dc581c2 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/test_url_preview.py @@ -26,8 +26,8 @@ from twisted.internet.interfaces import IAddress, IResolutionReceiver from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactor from synapse.config.oembed import OEmbedEndpointConfig -from synapse.rest.media.v1.media_repository import MediaRepositoryResource -from synapse.rest.media.v1.preview_url_resource import IMAGE_CACHE_EXPIRY_MS +from synapse.rest.media.media_repository_resource import MediaRepositoryResource +from synapse.rest.media.preview_url_resource import IMAGE_CACHE_EXPIRY_MS from synapse.server import HomeServer from synapse.types import JsonDict from synapse.util import Clock @@ -82,7 +82,7 @@ class URLPreviewTests(unittest.HomeserverTestCase): config["media_store_path"] = self.media_store_path provider_config = { - "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend", + "module": "synapse.media.storage_provider.FileStorageProviderBackend", "store_local": True, "store_synchronous": False, "store_remote": True, diff --git a/tests/storage/test_account_data.py b/tests/storage/test_account_data.py index 1bfd11ceae..b12691a9d3 100644 --- a/tests/storage/test_account_data.py +++ b/tests/storage/test_account_data.py @@ -140,3 +140,25 @@ class IgnoredUsersTestCase(unittest.HomeserverTestCase): # No one ignores the user now. self.assert_ignored(self.user, set()) self.assert_ignorers("@other:test", set()) + + def test_ignoring_users_with_latest_stream_ids(self) -> None: + """Test that ignoring users updates the latest stream ID for the ignored + user list account data.""" + + def get_latest_ignore_streampos(user_id: str) -> Optional[int]: + return self.get_success( + self.store.get_latest_stream_id_for_global_account_data_by_type_for_user( + user_id, AccountDataTypes.IGNORED_USER_LIST + ) + ) + + self.assertIsNone(get_latest_ignore_streampos("@user:test")) + + self._update_ignore_list("@other:test", "@another:remote") + + self.assertEqual(get_latest_ignore_streampos("@user:test"), 2) + + # Add one user, remove one user, and leave one user. + self._update_ignore_list("@foo:test", "@another:remote") + + self.assertEqual(get_latest_ignore_streampos("@user:test"), 3) diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index 73d11e7786..e39b63edac 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -522,7 +522,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): latest_event_ids = self.get_success( self.store.get_prev_events_for_room(room_id) ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_handler.create_event( self.requester, { @@ -535,6 +535,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): prev_event_ids=latest_event_ids, ) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( event_handler.handle_new_client_event( self.requester, events_and_context=[(event, context)] @@ -544,7 +545,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): assert state_ids1 is not None state1 = set(state_ids1.values()) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_handler.create_event( self.requester, { @@ -557,6 +558,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): prev_event_ids=latest_event_ids, ) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( event_handler.handle_new_client_event( self.requester, events_and_context=[(event, context)] diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index e82c03f597..62aed6af0a 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -496,3 +496,129 @@ class StateStoreTestCase(HomeserverTestCase): self.assertEqual(is_all, True) self.assertDictEqual({(e5.type, e5.state_key): e5.event_id}, state_dict) + + def test_batched_state_group_storing(self) -> None: + creation_event = self.inject_state_event( + self.room, self.u_alice, EventTypes.Create, "", {} + ) + state_to_event = self.get_success( + self.storage.state.get_state_groups( + self.room.to_string(), [creation_event.event_id] + ) + ) + current_state_group = list(state_to_event.keys())[0] + + # create some unpersisted events and event contexts to store against room + events_and_context = [] + builder = self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.Name, + "sender": self.u_alice.to_string(), + "state_key": "", + "room_id": self.room.to_string(), + "content": {"name": "first rename of room"}, + }, + ) + + event1, unpersisted_context1 = self.get_success( + self.event_creation_handler.create_new_client_event(builder) + ) + events_and_context.append((event1, unpersisted_context1)) + + builder2 = self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.JoinRules, + "sender": self.u_alice.to_string(), + "state_key": "", + "room_id": self.room.to_string(), + "content": {"join_rule": "private"}, + }, + ) + + event2, unpersisted_context2 = self.get_success( + self.event_creation_handler.create_new_client_event(builder2) + ) + events_and_context.append((event2, unpersisted_context2)) + + builder3 = self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.Message, + "sender": self.u_alice.to_string(), + "room_id": self.room.to_string(), + "content": {"body": "hello from event 3", "msgtype": "m.text"}, + }, + ) + + event3, unpersisted_context3 = self.get_success( + self.event_creation_handler.create_new_client_event(builder3) + ) + events_and_context.append((event3, unpersisted_context3)) + + builder4 = self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.JoinRules, + "sender": self.u_alice.to_string(), + "state_key": "", + "room_id": self.room.to_string(), + "content": {"join_rule": "public"}, + }, + ) + + event4, unpersisted_context4 = self.get_success( + self.event_creation_handler.create_new_client_event(builder4) + ) + events_and_context.append((event4, unpersisted_context4)) + + processed_events_and_context = self.get_success( + self.hs.get_datastores().state.store_state_deltas_for_batched( + events_and_context, self.room.to_string(), current_state_group + ) + ) + + # check that only state events are in state_groups, and all state events are in state_groups + res = self.get_success( + self.store.db_pool.simple_select_list( + table="state_groups", + keyvalues=None, + retcols=("event_id",), + ) + ) + + events = [] + for result in res: + self.assertNotIn(event3.event_id, result) + events.append(result.get("event_id")) + + for event, _ in processed_events_and_context: + if event.is_state(): + self.assertIn(event.event_id, events) + + # check that each unique state has state group in state_groups_state and that the + # type/state key is correct, and check that each state event's state group + # has an entry and prev event in state_group_edges + for event, context in processed_events_and_context: + if event.is_state(): + state = self.get_success( + self.store.db_pool.simple_select_list( + table="state_groups_state", + keyvalues={"state_group": context.state_group_after_event}, + retcols=("type", "state_key"), + ) + ) + self.assertEqual(event.type, state[0].get("type")) + self.assertEqual(event.state_key, state[0].get("state_key")) + + groups = self.get_success( + self.store.db_pool.simple_select_list( + table="state_group_edges", + keyvalues={"state_group": str(context.state_group_after_event)}, + retcols=("*",), + ) + ) + self.assertEqual( + context.state_group_before_event, groups[0].get("prev_state_group") + ) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 2d169684cf..43b724c4dd 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -504,6 +504,139 @@ class UserDirectoryStoreTestCase(HomeserverTestCase): {"user_id": BELA, "display_name": "Bela", "avatar_url": None}, ) + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_ascii_case_insensitivity(self) -> None: + """Tests that a user can look up another user by searching for their name in a + different case. + """ + CHARLIE = "@someuser:example.org" + self.get_success( + self.store.update_profile_in_user_dir(CHARLIE, "Charlie", None) + ) + + r = self.get_success(self.store.search_user_dir(ALICE, "cHARLIE", 10)) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual( + r["results"][0], + {"user_id": CHARLIE, "display_name": "Charlie", "avatar_url": None}, + ) + + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_unicode_case_insensitivity(self) -> None: + """Tests that a user can look up another user by searching for their name in a + different case. + """ + IVAN = "@someuser:example.org" + self.get_success(self.store.update_profile_in_user_dir(IVAN, "Иван", None)) + + r = self.get_success(self.store.search_user_dir(ALICE, "иВАН", 10)) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual( + r["results"][0], + {"user_id": IVAN, "display_name": "Иван", "avatar_url": None}, + ) + + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_dotted_dotless_i_case_insensitivity(self) -> None: + """Tests that a user can look up another user by searching for their name in a + different case, when their name contains dotted or dotless "i"s. + + Some languages have dotted and dotless versions of "i", which are considered to + be different letters: i <-> İ, ı <-> I. To make things difficult, they reuse the + ASCII "i" and "I" code points, despite having different lowercase / uppercase + forms. + """ + USER = "@someuser:example.org" + + expected_matches = [ + # (search_term, display_name) + # A search for "i" should match "İ". + ("iiiii", "İİİİİ"), + # A search for "I" should match "ı". + ("IIIII", "ııııı"), + # A search for "ı" should match "I". + ("ııııı", "IIIII"), + # A search for "İ" should match "i". + ("İİİİİ", "iiiii"), + ] + + for search_term, display_name in expected_matches: + self.get_success( + self.store.update_profile_in_user_dir(USER, display_name, None) + ) + + r = self.get_success(self.store.search_user_dir(ALICE, search_term, 10)) + self.assertFalse(r["limited"]) + self.assertEqual( + 1, + len(r["results"]), + f"searching for {search_term!r} did not match {display_name!r}", + ) + self.assertDictEqual( + r["results"][0], + {"user_id": USER, "display_name": display_name, "avatar_url": None}, + ) + + # We don't test for negative matches, to allow implementations that consider all + # the i variants to be the same. + + test_search_user_dir_dotted_dotless_i_case_insensitivity.skip = "not supported" # type: ignore + + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_unicode_normalization(self) -> None: + """Tests that a user can look up another user by searching for their name with + either composed or decomposed accents. + """ + AMELIE = "@someuser:example.org" + + expected_matches = [ + # (search_term, display_name) + ("Ame\u0301lie", "Amélie"), + ("Amélie", "Ame\u0301lie"), + ] + + for search_term, display_name in expected_matches: + self.get_success( + self.store.update_profile_in_user_dir(AMELIE, display_name, None) + ) + + r = self.get_success(self.store.search_user_dir(ALICE, search_term, 10)) + self.assertFalse(r["limited"]) + self.assertEqual( + 1, + len(r["results"]), + f"searching for {search_term!r} did not match {display_name!r}", + ) + self.assertDictEqual( + r["results"][0], + {"user_id": AMELIE, "display_name": display_name, "avatar_url": None}, + ) + + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_accent_insensitivity(self) -> None: + """Tests that a user can look up another user by searching for their name + without any accents. + """ + AMELIE = "@someuser:example.org" + self.get_success(self.store.update_profile_in_user_dir(AMELIE, "Amélie", None)) + + r = self.get_success(self.store.search_user_dir(ALICE, "amelie", 10)) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual( + r["results"][0], + {"user_id": AMELIE, "display_name": "Amélie", "avatar_url": None}, + ) + + # It may be desirable for "é"s in search terms to not match plain "e"s and we + # really don't want "é"s in search terms to match "e"s with different accents. + # But we don't test for this to allow implementations that consider all + # "e"-lookalikes to be the same. + + test_search_user_dir_accent_insensitivity.skip = "not supported yet" # type: ignore + class UserDirectoryStoreTestCaseWithIcu(UserDirectoryStoreTestCase): use_icu = True diff --git a/tests/unittest.py b/tests/unittest.py index b21e7f1221..f9160faa1d 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -723,7 +723,7 @@ class HomeserverTestCase(TestCase): event_creator = self.hs.get_event_creation_handler() requester = create_requester(user) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_creator.create_event( requester, { @@ -735,7 +735,7 @@ class HomeserverTestCase(TestCase): prev_event_ids=prev_event_ids, ) ) - + context = self.get_success(unpersisted_context.persist(event)) if soft_failed: event.internal_metadata.soft_failed = True |