diff options
127 files changed, 3516 insertions, 1748 deletions
diff --git a/.ci/scripts/test_export_data_command.sh b/.ci/scripts/test_export_data_command.sh new file mode 100755 index 0000000000..75f5811d10 --- /dev/null +++ b/.ci/scripts/test_export_data_command.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +# Test for the export-data admin command against sqlite and postgres + +set -xe +cd `dirname $0`/../.. + +echo "--- Install dependencies" + +# Install dependencies for this test. +pip install psycopg2 + +# Install Synapse itself. This won't update any libraries. +pip install -e . + +echo "--- Generate the signing key" + +# Generate the server's signing key. +python -m synapse.app.homeserver --generate-keys -c .ci/sqlite-config.yaml + +echo "--- Prepare test database" + +# Make sure the SQLite3 database is using the latest schema and has no pending background update. +scripts/update_synapse_database --database-config .ci/sqlite-config.yaml --run-background-updates + +# Run the export-data command on the sqlite test database +python -m synapse.app.admin_cmd -c .ci/sqlite-config.yaml export-data @anon-20191002_181700-832:localhost:8800 \ +--output-directory /tmp/export_data + +# Test that the output directory exists and contains the rooms directory +dir="/tmp/export_data/rooms" +if [ -d "$dir" ]; then + echo "Command successful, this test passes" +else + echo "No output directories found, the command fails against a sqlite database." + exit 1 +fi + +# Create the PostgreSQL database. +.ci/scripts/postgres_exec.py "CREATE DATABASE synapse" + +# Port the SQLite databse to postgres so we can check command works against postgres +echo "+++ Port SQLite3 databse to postgres" +scripts/synapse_port_db --sqlite-database .ci/test_db.db --postgres-config .ci/postgres-config.yaml + +# Run the export-data command on postgres database +python -m synapse.app.admin_cmd -c .ci/postgres-config.yaml export-data @anon-20191002_181700-832:localhost:8800 \ +--output-directory /tmp/export_data2 + +# Test that the output directory exists and contains the rooms directory +dir2="/tmp/export_data2/rooms" +if [ -d "$dir2" ]; then + echo "Command successful, this test passes" +else + echo "No output directories found, the command fails against a postgres database." + exit 1 +fi diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 30a911fdbd..8d7e8cafd9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -122,6 +122,8 @@ jobs: SYNAPSE_POSTGRES_USER: postgres SYNAPSE_POSTGRES_PASSWORD: postgres - name: Dump logs + # Logs are most useful when the command fails, always include them. + if: ${{ always() }} # Note: Dumps to workflow logs instead of using actions/upload-artifact # This keeps logs colocated with failing jobs # It also ignores find's exit code; this is a best effort affair @@ -146,6 +148,8 @@ jobs: env: TRIAL_FLAGS: "--jobs=2" - name: Dump logs + # Logs are most useful when the command fails, always include them. + if: ${{ always() }} # Note: Dumps to workflow logs instead of using actions/upload-artifact # This keeps logs colocated with failing jobs # It also ignores find's exit code; this is a best effort affair @@ -176,6 +180,8 @@ jobs: env: TRIAL_FLAGS: "--jobs=2" - name: Dump logs + # Logs are most useful when the command fails, always include them. + if: ${{ always() }} # Note: Dumps to workflow logs instead of using actions/upload-artifact # This keeps logs colocated with failing jobs # It also ignores find's exit code; this is a best effort affair @@ -247,6 +253,35 @@ jobs: /logs/results.tap /logs/**/*.log* + export-data: + if: ${{ !failure() && !cancelled() }} # Allow previous steps to be skipped, but not fail + needs: [linting-done, portdb] + runs-on: ubuntu-latest + env: + TOP: ${{ github.workspace }} + + services: + postgres: + image: postgres + ports: + - 5432:5432 + env: + POSTGRES_PASSWORD: "postgres" + POSTGRES_INITDB_ARGS: "--lc-collate C --lc-ctype C --encoding UTF8" + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - uses: actions/checkout@v2 + - run: sudo apt-get -qq install xmlsec1 + - uses: actions/setup-python@v2 + with: + python-version: "3.9" + - run: .ci/scripts/test_export_data_command.sh + portdb: if: ${{ !failure() && !cancelled() }} # Allow previous steps to be skipped, but not fail needs: linting-done diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml index b5c729888f..e974ac7aba 100644 --- a/.github/workflows/twisted_trunk.yml +++ b/.github/workflows/twisted_trunk.yml @@ -33,6 +33,8 @@ jobs: TRIAL_FLAGS: "--jobs=2" - name: Dump logs + # Logs are most useful when the command fails, always include them. + if: ${{ always() }} # Note: Dumps to workflow logs instead of using actions/upload-artifact # This keeps logs colocated with failing jobs # It also ignores find's exit code; this is a best effort affair diff --git a/MANIFEST.in b/MANIFEST.in index 44d5cc7618..c24786c3b3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -8,6 +8,7 @@ include demo/demo.tls.dh include demo/*.py include demo/*.sh +include synapse/py.typed recursive-include synapse/storage *.sql recursive-include synapse/storage *.sql.postgres recursive-include synapse/storage *.sql.sqlite diff --git a/changelog.d/10548.feature b/changelog.d/10548.feature new file mode 100644 index 0000000000..263a811faf --- /dev/null +++ b/changelog.d/10548.feature @@ -0,0 +1 @@ +Port the Password Auth Providers module interface to the new generic interface. \ No newline at end of file diff --git a/changelog.d/10972.misc b/changelog.d/10972.misc new file mode 100644 index 0000000000..f66a7beaf0 --- /dev/null +++ b/changelog.d/10972.misc @@ -0,0 +1 @@ +Add type hints to `synapse.storage.databases.main.client_ips`. diff --git a/changelog.d/10975.feature b/changelog.d/10975.feature new file mode 100644 index 0000000000..167426e1fc --- /dev/null +++ b/changelog.d/10975.feature @@ -0,0 +1 @@ +Resolve and share `state_groups` for all [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) historical events in batch. diff --git a/changelog.d/10984.misc b/changelog.d/10984.misc new file mode 100644 index 0000000000..86c4081cc4 --- /dev/null +++ b/changelog.d/10984.misc @@ -0,0 +1 @@ +Fix spurious warnings about losing the logging context on the `ReplicationCommandHandler` when losing the replication connection. diff --git a/changelog.d/11001.bugfix b/changelog.d/11001.bugfix new file mode 100644 index 0000000000..f51ffb3481 --- /dev/null +++ b/changelog.d/11001.bugfix @@ -0,0 +1 @@ + Fix a long-standing bug which meant that events received over federation were sometimes incorrectly accepted into the room state. diff --git a/changelog.d/11008.misc b/changelog.d/11008.misc new file mode 100644 index 0000000000..a67d95d66f --- /dev/null +++ b/changelog.d/11008.misc @@ -0,0 +1 @@ +Include rejected status when we log events. diff --git a/changelog.d/11009.bugfix b/changelog.d/11009.bugfix new file mode 100644 index 0000000000..13b8e5983b --- /dev/null +++ b/changelog.d/11009.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug which meant that events received over federation were sometimes incorrectly accepted into the room state. diff --git a/changelog.d/11014.misc b/changelog.d/11014.misc new file mode 100644 index 0000000000..4b99ea354f --- /dev/null +++ b/changelog.d/11014.misc @@ -0,0 +1 @@ +Add some extra logging to the event persistence code. diff --git a/changelog.d/11024.misc b/changelog.d/11024.misc new file mode 100644 index 0000000000..51ad800d4d --- /dev/null +++ b/changelog.d/11024.misc @@ -0,0 +1 @@ +Add support for Ubuntu 21.10 "Impish Indri". \ No newline at end of file diff --git a/changelog.d/11027.bugfix b/changelog.d/11027.bugfix new file mode 100644 index 0000000000..ae6cc44470 --- /dev/null +++ b/changelog.d/11027.bugfix @@ -0,0 +1 @@ +Fix 500 error on `/messages` when the server accumulates more than 5 backwards extremities at a given depth for a room. diff --git a/changelog.d/11035.misc b/changelog.d/11035.misc new file mode 100644 index 0000000000..6b45b7e9bd --- /dev/null +++ b/changelog.d/11035.misc @@ -0,0 +1 @@ +Rearrange the internal workings of the incremental user directory updates. \ No newline at end of file diff --git a/changelog.d/11048.misc b/changelog.d/11048.misc new file mode 100644 index 0000000000..22d3c956f5 --- /dev/null +++ b/changelog.d/11048.misc @@ -0,0 +1 @@ +Simplify the user admin API tests. \ No newline at end of file diff --git a/changelog.d/11054.misc b/changelog.d/11054.misc new file mode 100644 index 0000000000..1103368fec --- /dev/null +++ b/changelog.d/11054.misc @@ -0,0 +1 @@ +Mark the Synapse package as containing type annotations and fix export declarations so that Synapse pluggable modules may be type checked against Synapse. diff --git a/changelog.d/11055.misc b/changelog.d/11055.misc new file mode 100644 index 0000000000..27688c3214 --- /dev/null +++ b/changelog.d/11055.misc @@ -0,0 +1 @@ +Improve type hints for `_wrap_in_base_path` decorator used by `MediaFilePaths`. diff --git a/changelog.d/11056.misc b/changelog.d/11056.misc new file mode 100644 index 0000000000..dd701ed177 --- /dev/null +++ b/changelog.d/11056.misc @@ -0,0 +1 @@ +Remove dead code from `MediaFilePaths`. diff --git a/changelog.d/11057.misc b/changelog.d/11057.misc new file mode 100644 index 0000000000..4d412d3e9b --- /dev/null +++ b/changelog.d/11057.misc @@ -0,0 +1 @@ +Add tests for `MediaFilePaths` class. diff --git a/changelog.d/11065.misc b/changelog.d/11065.misc new file mode 100644 index 0000000000..c6f37fc52b --- /dev/null +++ b/changelog.d/11065.misc @@ -0,0 +1 @@ +Be more lenient when parsing oEmbed response versions. diff --git a/changelog.d/11066.misc b/changelog.d/11066.misc new file mode 100644 index 0000000000..1e337bee54 --- /dev/null +++ b/changelog.d/11066.misc @@ -0,0 +1 @@ +Add type hints to `synapse.events`. diff --git a/changelog.d/11068.misc b/changelog.d/11068.misc new file mode 100644 index 0000000000..1fe69aecde --- /dev/null +++ b/changelog.d/11068.misc @@ -0,0 +1 @@ +Always dump logs from unit tests during CI runs. diff --git a/changelog.d/11069.doc b/changelog.d/11069.doc new file mode 100644 index 0000000000..dae4ae1777 --- /dev/null +++ b/changelog.d/11069.doc @@ -0,0 +1 @@ +Fix broken links relating to module API deprecation in the upgrade notes. diff --git a/changelog.d/11070.misc b/changelog.d/11070.misc new file mode 100644 index 0000000000..52b23f9671 --- /dev/null +++ b/changelog.d/11070.misc @@ -0,0 +1 @@ +Create a separate module for the retention configuration. diff --git a/changelog.d/11071.misc b/changelog.d/11071.misc new file mode 100644 index 0000000000..33a11abdd5 --- /dev/null +++ b/changelog.d/11071.misc @@ -0,0 +1 @@ +Add a test for the workaround introduced in [\#11042](https://github.com/matrix-org/synapse/pull/11042) concerning the behaviour of third-party rule modules and `SynapseError`s. diff --git a/changelog.d/11075.bugfix b/changelog.d/11075.bugfix new file mode 100644 index 0000000000..9b24971c5a --- /dev/null +++ b/changelog.d/11075.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where users excluded from the user directory were added into the directory if they belonged to a room which became public or private. \ No newline at end of file diff --git a/changelog.d/11076.misc b/changelog.d/11076.misc new file mode 100644 index 0000000000..c581a86e47 --- /dev/null +++ b/changelog.d/11076.misc @@ -0,0 +1 @@ +Fix type hints in the relations tests. diff --git a/changelog.d/11077.bugfix b/changelog.d/11077.bugfix new file mode 100644 index 0000000000..dc35c86440 --- /dev/null +++ b/changelog.d/11077.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug when attempting to preview URLs which are in the `windows-1252` character encoding. diff --git a/changelog.d/11078.bugfix b/changelog.d/11078.bugfix new file mode 100644 index 0000000000..cc813babe4 --- /dev/null +++ b/changelog.d/11078.bugfix @@ -0,0 +1 @@ +Fix broken export-data admin command and add test script checking the command to CI. \ No newline at end of file diff --git a/changelog.d/11089.bugfix b/changelog.d/11089.bugfix new file mode 100644 index 0000000000..dc35c86440 --- /dev/null +++ b/changelog.d/11089.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug when attempting to preview URLs which are in the `windows-1252` character encoding. diff --git a/changelog.d/11092.doc b/changelog.d/11092.doc new file mode 100644 index 0000000000..916c2b3476 --- /dev/null +++ b/changelog.d/11092.doc @@ -0,0 +1 @@ +Clarify the the sample log config can be copied from the documentation without issue. diff --git a/changelog.d/11093.doc b/changelog.d/11093.doc new file mode 100644 index 0000000000..70fca0bdce --- /dev/null +++ b/changelog.d/11093.doc @@ -0,0 +1 @@ +Update the admin API documentation with an updated list of the characters allowed in registration tokens. diff --git a/changelog.d/11096.doc b/changelog.d/11096.doc new file mode 100644 index 0000000000..d8e7424289 --- /dev/null +++ b/changelog.d/11096.doc @@ -0,0 +1 @@ +Document Synapse's behaviour when dealing with multiple modules registering the same callbacks and/or handlers for the same HTTP endpoints. diff --git a/changelog.d/11103.bugfix b/changelog.d/11103.bugfix new file mode 100644 index 0000000000..3498f04a45 --- /dev/null +++ b/changelog.d/11103.bugfix @@ -0,0 +1 @@ +Fix local users who left all their rooms being removed from the user directory, even if the "search_all_users" config option was enabled. \ No newline at end of file diff --git a/changelog.d/11109.misc b/changelog.d/11109.misc new file mode 100644 index 0000000000..d83936ccc4 --- /dev/null +++ b/changelog.d/11109.misc @@ -0,0 +1 @@ +Add missing type hints to `synapse.api` module. diff --git a/changelog.d/11115.misc b/changelog.d/11115.misc new file mode 100644 index 0000000000..9a765435db --- /dev/null +++ b/changelog.d/11115.misc @@ -0,0 +1 @@ +Clean up some of the federation event authentication code for clarity. diff --git a/changelog.d/11116.misc b/changelog.d/11116.misc new file mode 100644 index 0000000000..9a765435db --- /dev/null +++ b/changelog.d/11116.misc @@ -0,0 +1 @@ +Clean up some of the federation event authentication code for clarity. diff --git a/changelog.d/11118.doc b/changelog.d/11118.doc new file mode 100644 index 0000000000..3c2187f3b1 --- /dev/null +++ b/changelog.d/11118.doc @@ -0,0 +1 @@ +Fix instances of `[example]{.title-ref}` in the upgrade documentation as a result of prior RST to Markdown conversion. diff --git a/changelog.d/11121.misc b/changelog.d/11121.misc new file mode 100644 index 0000000000..916beeaacb --- /dev/null +++ b/changelog.d/11121.misc @@ -0,0 +1 @@ +Add type hints for event fetching. diff --git a/changelog.d/11132.doc b/changelog.d/11132.doc new file mode 100644 index 0000000000..4f38be5b27 --- /dev/null +++ b/changelog.d/11132.doc @@ -0,0 +1 @@ +Document the version of Synapse each module callback was introduced in. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index bdb44543b8..35412ea92c 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -43,6 +43,7 @@ - [Third-party rules callbacks](modules/third_party_rules_callbacks.md) - [Presence router callbacks](modules/presence_router_callbacks.md) - [Account validity callbacks](modules/account_validity_callbacks.md) + - [Password auth provider callbacks](modules/password_auth_provider_callbacks.md) - [Porting a legacy module to the new interface](modules/porting_legacy_module.md) - [Workers](workers.md) - [Using `synctl` with Workers](synctl_workers.md) diff --git a/docs/modules/account_validity_callbacks.md b/docs/modules/account_validity_callbacks.md index 80684b7828..3cd0e72198 100644 --- a/docs/modules/account_validity_callbacks.md +++ b/docs/modules/account_validity_callbacks.md @@ -9,6 +9,8 @@ The available account validity callbacks are: ### `is_user_expired` +_First introduced in Synapse v1.39.0_ + ```python async def is_user_expired(user: str) -> Optional[bool] ``` @@ -22,8 +24,15 @@ If the module returns `True`, the current request will be denied with the error `ORG_MATRIX_EXPIRED_ACCOUNT` and the HTTP status code 403. Note that this doesn't invalidate the user's access token. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `None`, Synapse falls through to the next one. The value of the first +callback that does not return `None` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `on_user_registration` +_First introduced in Synapse v1.39.0_ + ```python async def on_user_registration(user: str) -> None ``` @@ -31,3 +40,5 @@ async def on_user_registration(user: str) -> None Called after successfully registering a user, in case the module needs to perform extra operations to keep track of them. (e.g. add them to a database table). The user is represented by their Matrix user ID. + +If multiple modules implement this callback, Synapse runs them all in order. diff --git a/docs/modules/index.md b/docs/modules/index.md index 3fda8cb7f0..0a868b309f 100644 --- a/docs/modules/index.md +++ b/docs/modules/index.md @@ -2,6 +2,11 @@ Synapse supports extending its functionality by configuring external modules. +**Note**: When using third-party modules, you effectively allow someone else to run +custom code on your Synapse homeserver. Server admins are encouraged to verify the +provenance of the modules they use on their homeserver and make sure the modules aren't +running malicious code on their instance. + ## Using modules To use a module on Synapse, add it to the `modules` section of the configuration file: @@ -18,17 +23,31 @@ modules: Each module is defined by a path to a Python class as well as a configuration. This information for a given module should be available in the module's own documentation. -**Note**: When using third-party modules, you effectively allow someone else to run -custom code on your Synapse homeserver. Server admins are encouraged to verify the -provenance of the modules they use on their homeserver and make sure the modules aren't -running malicious code on their instance. +## Using multiple modules + +The order in which modules are listed in this section is important. When processing an +action that can be handled by several modules, Synapse will always prioritise the module +that appears first (i.e. is the highest in the list). This means: + +* If several modules register the same callback, the callback registered by the module + that appears first is used. +* If several modules try to register a handler for the same HTTP path, only the handler + registered by the module that appears first is used. Handlers registered by the other + module(s) are ignored and Synapse will log a warning message about them. + +Note that Synapse doesn't allow multiple modules implementing authentication checkers via +the password auth provider feature for the same login type with different fields. If this +happens, Synapse will refuse to start. + +## Current status -Also note that we are currently in the process of migrating module interfaces to this -system. While some interfaces might be compatible with it, others still require -configuring modules in another part of Synapse's configuration file. +We are currently in the process of migrating module interfaces to this system. While some +interfaces might be compatible with it, others still require configuring modules in +another part of Synapse's configuration file. Currently, only the following pre-existing interfaces are compatible with this new system: * spam checker * third-party rules * presence router +* password auth providers diff --git a/docs/modules/password_auth_provider_callbacks.md b/docs/modules/password_auth_provider_callbacks.md new file mode 100644 index 0000000000..9dddfdfaaa --- /dev/null +++ b/docs/modules/password_auth_provider_callbacks.md @@ -0,0 +1,176 @@ +# Password auth provider callbacks + +Password auth providers offer a way for server administrators to integrate +their Synapse installation with an external authentication system. The callbacks can be +registered by using the Module API's `register_password_auth_provider_callbacks` method. + +## Callbacks + +### `auth_checkers` + +_First introduced in Synapse v1.46.0_ + +``` + auth_checkers: Dict[Tuple[str,Tuple], Callable] +``` + +A dict mapping from tuples of a login type identifier (such as `m.login.password`) and a +tuple of field names (such as `("password", "secret_thing")`) to authentication checking +callbacks, which should be of the following form: + +```python +async def check_auth( + user: str, + login_type: str, + login_dict: "synapse.module_api.JsonDict", +) -> Optional[ + Tuple[ + str, + Optional[Callable[["synapse.module_api.LoginResponse"], Awaitable[None]]] + ] +] +``` + +The login type and field names should be provided by the user in the +request to the `/login` API. [The Matrix specification](https://matrix.org/docs/spec/client_server/latest#authentication-types) +defines some types, however user defined ones are also allowed. + +The callback is passed the `user` field provided by the client (which might not be in +`@username:server` form), the login type, and a dictionary of login secrets passed by +the client. + +If the authentication is successful, the module must return the user's Matrix ID (e.g. +`@alice:example.com`) and optionally a callback to be called with the response to the +`/login` request. If the module doesn't wish to return a callback, it must return `None` +instead. + +If the authentication is unsuccessful, the module must return `None`. + +If multiple modules register an auth checker for the same login type but with different +fields, Synapse will refuse to start. + +If multiple modules register an auth checker for the same login type with the same fields, +then the callbacks will be executed in order, until one returns a Matrix User ID (and +optionally a callback). In that case, the return value of that callback will be accepted +and subsequent callbacks will not be fired. If every callback returns `None`, then the +authentication fails. + +### `check_3pid_auth` + +_First introduced in Synapse v1.46.0_ + +```python +async def check_3pid_auth( + medium: str, + address: str, + password: str, +) -> Optional[ + Tuple[ + str, + Optional[Callable[["synapse.module_api.LoginResponse"], Awaitable[None]]] + ] +] +``` + +Called when a user attempts to register or log in with a third party identifier, +such as email. It is passed the medium (eg. `email`), an address (eg. `jdoe@example.com`) +and the user's password. + +If the authentication is successful, the module must return the user's Matrix ID (e.g. +`@alice:example.com`) and optionally a callback to be called with the response to the `/login` request. +If the module doesn't wish to return a callback, it must return None instead. + +If the authentication is unsuccessful, the module must return `None`. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `None`, Synapse falls through to the next one. The value of the first +callback that does not return `None` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. If every callback return `None`, +the authentication is denied. + +### `on_logged_out` + +_First introduced in Synapse v1.46.0_ + +```python +async def on_logged_out( + user_id: str, + device_id: Optional[str], + access_token: str +) -> None +``` +Called during a logout request for a user. It is passed the qualified user ID, the ID of the +deactivated device (if any: access tokens are occasionally created without an associated +device ID), and the (now deactivated) access token. + +If multiple modules implement this callback, Synapse runs them all in order. + +## Example + +The example module below implements authentication checkers for two different login types: +- `my.login.type` + - Expects a `my_field` field to be sent to `/login` + - Is checked by the method: `self.check_my_login` +- `m.login.password` (defined in [the spec](https://matrix.org/docs/spec/client_server/latest#password-based)) + - Expects a `password` field to be sent to `/login` + - Is checked by the method: `self.check_pass` + + +```python +from typing import Awaitable, Callable, Optional, Tuple + +import synapse +from synapse import module_api + + +class MyAuthProvider: + def __init__(self, config: dict, api: module_api): + + self.api = api + + self.credentials = { + "bob": "building", + "@scoop:matrix.org": "digging", + } + + api.register_password_auth_provider_callbacks( + auth_checkers={ + ("my.login_type", ("my_field",)): self.check_my_login, + ("m.login.password", ("password",)): self.check_pass, + }, + ) + + async def check_my_login( + self, + username: str, + login_type: str, + login_dict: "synapse.module_api.JsonDict", + ) -> Optional[ + Tuple[ + str, + Optional[Callable[["synapse.module_api.LoginResponse"], Awaitable[None]]], + ] + ]: + if login_type != "my.login_type": + return None + + if self.credentials.get(username) == login_dict.get("my_field"): + return self.api.get_qualified_user_id(username) + + async def check_pass( + self, + username: str, + login_type: str, + login_dict: "synapse.module_api.JsonDict", + ) -> Optional[ + Tuple[ + str, + Optional[Callable[["synapse.module_api.LoginResponse"], Awaitable[None]]], + ] + ]: + if login_type != "m.login.password": + return None + + if self.credentials.get(username) == login_dict.get("password"): + return self.api.get_qualified_user_id(username) +``` diff --git a/docs/modules/porting_legacy_module.md b/docs/modules/porting_legacy_module.md index a7a251e535..89084eb7b3 100644 --- a/docs/modules/porting_legacy_module.md +++ b/docs/modules/porting_legacy_module.md @@ -12,6 +12,9 @@ should register this resource in its `__init__` method using the `register_web_r method from the `ModuleApi` class (see [this section](writing_a_module.html#registering-a-web-resource) for more info). +There is no longer a `get_db_schema_files` callback provided for password auth provider modules. Any +changes to the database should now be made by the module using the module API class. + The module's author should also update any example in the module's configuration to only use the new `modules` section in Synapse's configuration file (see [this section](index.html#using-modules) for more info). diff --git a/docs/modules/presence_router_callbacks.md b/docs/modules/presence_router_callbacks.md index 4abcc9af47..d3da25cef4 100644 --- a/docs/modules/presence_router_callbacks.md +++ b/docs/modules/presence_router_callbacks.md @@ -10,6 +10,8 @@ The available presence router callbacks are: ### `get_users_for_states` +_First introduced in Synapse v1.42.0_ + ```python async def get_users_for_states( state_updates: Iterable["synapse.api.UserPresenceState"], @@ -24,8 +26,14 @@ must return a dictionary that maps from Matrix user IDs (which can be local or r Synapse will then attempt to send the specified presence updates to each user when possible. +If multiple modules implement this callback, Synapse merges all the dictionaries returned +by the callbacks. If multiple callbacks return a dictionary containing the same key, +Synapse concatenates the sets associated with this key from each dictionary. + ### `get_interested_users` +_First introduced in Synapse v1.42.0_ + ```python async def get_interested_users( user_id: str @@ -44,6 +52,12 @@ query. The returned users can be local or remote. Alternatively the callback can return `synapse.module_api.PRESENCE_ALL_USERS` to indicate that the user should receive updates from all known users. +If multiple modules implement this callback, they will be considered in order. Synapse +calls each callback one by one, and use a concatenation of all the `set`s returned by the +callbacks. If one callback returns `synapse.module_api.PRESENCE_ALL_USERS`, Synapse uses +this value instead. If this happens, Synapse does not call any of the subsequent +implementations of this callback. + ## Example The example below is a module that implements both presence router callbacks, and ensures diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index 787e99074a..534ea196e0 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -10,6 +10,8 @@ The available spam checker callbacks are: ### `check_event_for_spam` +_First introduced in Synapse v1.37.0_ + ```python async def check_event_for_spam(event: "synapse.events.EventBase") -> Union[bool, str] ``` @@ -19,8 +21,15 @@ either a `bool` to indicate whether the event must be rejected because of spam, to indicate the event must be rejected because of spam and to give a rejection reason to forward to clients. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `False`, Synapse falls through to the next one. The value of the first +callback that does not return `False` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `user_may_join_room` +_First introduced in Synapse v1.37.0_ + ```python async def user_may_join_room(user: str, room: str, is_invited: bool) -> bool ``` @@ -34,8 +43,15 @@ currently has a pending invite in the room. This callback isn't called if the join is performed by a server administrator, or in the context of a room creation. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `user_may_invite` +_First introduced in Synapse v1.37.0_ + ```python async def user_may_invite(inviter: str, invitee: str, room_id: str) -> bool ``` @@ -44,8 +60,15 @@ Called when processing an invitation. The module must return a `bool` indicating the inviter can invite the invitee to the given room. Both inviter and invitee are represented by their Matrix user ID (e.g. `@alice:example.com`). +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `user_may_send_3pid_invite` +_First introduced in Synapse v1.45.0_ + ```python async def user_may_send_3pid_invite( inviter: str, @@ -79,8 +102,15 @@ await user_may_send_3pid_invite( **Note**: If the third-party identifier is already associated with a matrix user ID, [`user_may_invite`](#user_may_invite) will be used instead. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `user_may_create_room` +_First introduced in Synapse v1.37.0_ + ```python async def user_may_create_room(user: str) -> bool ``` @@ -88,8 +118,15 @@ async def user_may_create_room(user: str) -> bool Called when processing a room creation request. The module must return a `bool` indicating whether the given user (represented by their Matrix user ID) is allowed to create a room. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `user_may_create_room_with_invites` +_First introduced in Synapse v1.44.0_ + ```python async def user_may_create_room_with_invites( user: str, @@ -117,8 +154,15 @@ corresponding list(s) will be empty. since no invites are sent when cloning a room. To cover this case, modules also need to implement `user_may_create_room`. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `user_may_create_room_alias` +_First introduced in Synapse v1.37.0_ + ```python async def user_may_create_room_alias(user: str, room_alias: "synapse.types.RoomAlias") -> bool ``` @@ -127,8 +171,15 @@ Called when trying to associate an alias with an existing room. The module must `bool` indicating whether the given user (represented by their Matrix user ID) is allowed to set the given alias. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `user_may_publish_room` +_First introduced in Synapse v1.37.0_ + ```python async def user_may_publish_room(user: str, room_id: str) -> bool ``` @@ -137,8 +188,15 @@ Called when trying to publish a room to the homeserver's public rooms directory. module must return a `bool` indicating whether the given user (represented by their Matrix user ID) is allowed to publish the given room. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `check_username_for_spam` +_First introduced in Synapse v1.37.0_ + ```python async def check_username_for_spam(user_profile: Dict[str, str]) -> bool ``` @@ -154,8 +212,15 @@ is represented as a dictionary with the following keys: The module is given a copy of the original dictionary, so modifying it from within the module cannot modify a user's profile when included in user directory search results. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `False`, Synapse falls through to the next one. The value of the first +callback that does not return `False` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `check_registration_for_spam` +_First introduced in Synapse v1.37.0_ + ```python async def check_registration_for_spam( email_threepid: Optional[dict], @@ -179,8 +244,16 @@ The arguments passed to this callback are: used during the registration process. * `auth_provider_id`: The identifier of the SSO authentication provider, if any. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `RegistrationBehaviour.ALLOW`, Synapse falls through to the next one. +The value of the first callback that does not return `RegistrationBehaviour.ALLOW` will +be used. If this happens, Synapse will not call any of the subsequent implementations of +this callback. + ### `check_media_file_for_spam` +_First introduced in Synapse v1.37.0_ + ```python async def check_media_file_for_spam( file_wrapper: "synapse.rest.media.v1.media_storage.ReadableFileWrapper", @@ -191,6 +264,11 @@ async def check_media_file_for_spam( Called when storing a local or remote file. The module must return a boolean indicating whether the given file can be stored in the homeserver's media store. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `False`, Synapse falls through to the next one. The value of the first +callback that does not return `False` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ## Example The example below is a module that implements the spam checker callback diff --git a/docs/modules/third_party_rules_callbacks.md b/docs/modules/third_party_rules_callbacks.md index 2ba6f39453..034923da0f 100644 --- a/docs/modules/third_party_rules_callbacks.md +++ b/docs/modules/third_party_rules_callbacks.md @@ -10,6 +10,8 @@ The available third party rules callbacks are: ### `check_event_allowed` +_First introduced in Synapse v1.39.0_ + ```python async def check_event_allowed( event: "synapse.events.EventBase", @@ -44,8 +46,15 @@ dictionary, and modify the returned dictionary accordingly. Note that replacing the event only works for events sent by local users, not for events received over federation. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `on_create_room` +_First introduced in Synapse v1.39.0_ + ```python async def on_create_room( requester: "synapse.types.Requester", @@ -63,8 +72,16 @@ the request is a server admin. Modules can modify the `request_content` (by e.g. adding events to its `initial_state`), or deny the room's creation by raising a `module_api.errors.SynapseError`. +If multiple modules implement this callback, they will be considered in order. If a +callback returns without raising an exception, Synapse falls through to the next one. The +room creation will be forbidden as soon as one of the callbacks raises an exception. If +this happens, Synapse will not call any of the subsequent implementations of this +callback. + ### `check_threepid_can_be_invited` +_First introduced in Synapse v1.39.0_ + ```python async def check_threepid_can_be_invited( medium: str, @@ -76,8 +93,15 @@ async def check_threepid_can_be_invited( Called when processing an invite via a third-party identifier (i.e. email or phone number). The module must return a boolean indicating whether the invite can go through. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ### `check_visibility_can_be_modified` +_First introduced in Synapse v1.39.0_ + ```python async def check_visibility_can_be_modified( room_id: str, @@ -90,6 +114,11 @@ Called when changing the visibility of a room in the local public room directory visibility is a string that's either "public" or "private". The module must return a boolean indicating whether the change can go through. +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ## Example The example below is a module that implements the third-party rules callback diff --git a/docs/modules/writing_a_module.md b/docs/modules/writing_a_module.md index 4f2fec8dc9..7764e06692 100644 --- a/docs/modules/writing_a_module.md +++ b/docs/modules/writing_a_module.md @@ -12,6 +12,21 @@ configuration associated with the module in Synapse's configuration file. See the documentation for the `ModuleApi` class [here](https://github.com/matrix-org/synapse/blob/master/synapse/module_api/__init__.py). +## When Synapse runs with several modules configured + +If Synapse is running with other modules configured, the order each module appears in +within the `modules` section of the Synapse configuration file might restrict what it can +or cannot register. See [this section](index.html#using-multiple-modules) for more +information. + +On top of the rules listed in the link above, if a callback returns a value that should +cause the current operation to fail (e.g. if a callback checking an event returns with a +value that should cause the event to be denied), Synapse will fail the operation and +ignore any subsequent callbacks that should have been run after this one. + +The documentation for each callback mentions how Synapse behaves when +multiple modules implement it. + ## Handling the module's configuration A module can implement the following static method: diff --git a/docs/password_auth_providers.md b/docs/password_auth_providers.md index d2cdb9b2f4..d7beacfff3 100644 --- a/docs/password_auth_providers.md +++ b/docs/password_auth_providers.md @@ -1,3 +1,9 @@ +<h2 style="color:red"> +This page of the Synapse documentation is now deprecated. For up to date +documentation on setting up or writing a password auth provider module, please see +<a href="modules.md">this page</a>. +</h2> + # Password auth provider modules Password auth providers offer a way for server administrators to diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 166cec38d3..b90ed62d61 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -472,6 +472,48 @@ limit_remote_rooms: # #user_ips_max_age: 14d +# Inhibits the /requestToken endpoints from returning an error that might leak +# information about whether an e-mail address is in use or not on this +# homeserver. +# Note that for some endpoints the error situation is the e-mail already being +# used, and for others the error is entering the e-mail being unused. +# If this option is enabled, instead of returning an error, these endpoints will +# act as if no error happened and return a fake session ID ('sid') to clients. +# +#request_token_inhibit_3pid_errors: true + +# A list of domains that the domain portion of 'next_link' parameters +# must match. +# +# This parameter is optionally provided by clients while requesting +# validation of an email or phone number, and maps to a link that +# users will be automatically redirected to after validation +# succeeds. Clients can make use this parameter to aid the validation +# process. +# +# The whitelist is applied whether the homeserver or an +# identity server is handling validation. +# +# The default value is no whitelist functionality; all domains are +# allowed. Setting this value to an empty list will instead disallow +# all domains. +# +#next_link_domain_whitelist: ["matrix.org"] + +# Templates to use when generating email or HTML page contents. +# +templates: + # Directory in which Synapse will try to find template files to use to generate + # email or HTML page contents. + # If not set, or a file is not found within the template directory, a default + # template from within the Synapse package will be used. + # + # See https://matrix-org.github.io/synapse/latest/templates.html for more + # information about using custom templates. + # + #custom_template_directory: /path/to/custom/templates/ + + # Message retention policy at the server level. # # Room admins and mods can define a retention period for their rooms using the @@ -541,47 +583,6 @@ retention: # - shortest_max_lifetime: 3d # interval: 1d -# Inhibits the /requestToken endpoints from returning an error that might leak -# information about whether an e-mail address is in use or not on this -# homeserver. -# Note that for some endpoints the error situation is the e-mail already being -# used, and for others the error is entering the e-mail being unused. -# If this option is enabled, instead of returning an error, these endpoints will -# act as if no error happened and return a fake session ID ('sid') to clients. -# -#request_token_inhibit_3pid_errors: true - -# A list of domains that the domain portion of 'next_link' parameters -# must match. -# -# This parameter is optionally provided by clients while requesting -# validation of an email or phone number, and maps to a link that -# users will be automatically redirected to after validation -# succeeds. Clients can make use this parameter to aid the validation -# process. -# -# The whitelist is applied whether the homeserver or an -# identity server is handling validation. -# -# The default value is no whitelist functionality; all domains are -# allowed. Setting this value to an empty list will instead disallow -# all domains. -# -#next_link_domain_whitelist: ["matrix.org"] - -# Templates to use when generating email or HTML page contents. -# -templates: - # Directory in which Synapse will try to find template files to use to generate - # email or HTML page contents. - # If not set, or a file is not found within the template directory, a default - # template from within the Synapse package will be used. - # - # See https://matrix-org.github.io/synapse/latest/templates.html for more - # information about using custom templates. - # - #custom_template_directory: /path/to/custom/templates/ - ## TLS ## @@ -2260,34 +2261,6 @@ email: #email_validation: "[%(server_name)s] Validate your email" -# Password providers allow homeserver administrators to integrate -# their Synapse installation with existing authentication methods -# ex. LDAP, external tokens, etc. -# -# For more information and known implementations, please see -# https://matrix-org.github.io/synapse/latest/password_auth_providers.html -# -# Note: instances wishing to use SAML or CAS authentication should -# instead use the `saml2_config` or `cas_config` options, -# respectively. -# -password_providers: -# # Example config for an LDAP auth provider -# - module: "ldap_auth_provider.LdapAuthProvider" -# config: -# enabled: true -# uri: "ldap://ldap.example.com:389" -# start_tls: true -# base: "ou=users,dc=example,dc=com" -# attributes: -# uid: "cn" -# mail: "email" -# name: "givenName" -# #bind_dn: -# #bind_password: -# #filter: "(objectClass=posixAccount)" - - ## Push ## diff --git a/docs/upgrade.md b/docs/upgrade.md index 18ecb2678e..c47eef1a20 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -187,8 +187,8 @@ of this endpoint modifying the media store. The current third-party rules module interface is deprecated in favour of the new generic modules system introduced in Synapse v1.37.0. Authors of third-party rules modules can refer -to [this documentation](modules.md#porting-an-existing-module-that-uses-the-old-interface) -to update their modules. Synapse administrators can refer to [this documentation](modules.md#using-modules) +to [this documentation](modules/porting_legacy_module.md) +to update their modules. Synapse administrators can refer to [this documentation](modules/index.md) to update their configuration once the modules they are using have been updated. We plan to remove support for the current third-party rules interface in September 2021. @@ -237,9 +237,9 @@ SQLite databases are unaffected by this change. The current spam checker interface is deprecated in favour of a new generic modules system. Authors of spam checker modules can refer to [this -documentation](modules.md#porting-an-existing-module-that-uses-the-old-interface) +documentation](modules/porting_legacy_module.md to update their modules. Synapse administrators can refer to [this -documentation](modules.md#using-modules) +documentation](modules/index.md) to update their configuration once the modules they are using have been updated. We plan to remove support for the current spam checker interface in August 2021. @@ -348,24 +348,24 @@ Please ensure your Application Services are up to date. ## Requirement for X-Forwarded-Proto header When using Synapse with a reverse proxy (in particular, when using the -[x_forwarded]{.title-ref} option on an HTTP listener), Synapse now -expects to receive an [X-Forwarded-Proto]{.title-ref} header on incoming +`x_forwarded` option on an HTTP listener), Synapse now +expects to receive an `X-Forwarded-Proto` header on incoming HTTP requests. If it is not set, Synapse will log a warning on each received request. To avoid the warning, administrators using a reverse proxy should ensure -that the reverse proxy sets [X-Forwarded-Proto]{.title-ref} header to -[https]{.title-ref} or [http]{.title-ref} to indicate the protocol used +that the reverse proxy sets `X-Forwarded-Proto` header to +`https` or `http` to indicate the protocol used by the client. -Synapse also requires the [Host]{.title-ref} header to be preserved. +Synapse also requires the `Host` header to be preserved. See the [reverse proxy documentation](reverse_proxy.md), where the example configurations have been updated to show how to set these headers. (Users of [Caddy](https://caddyserver.com/) are unaffected, since we -believe it sets [X-Forwarded-Proto]{.title-ref} by default.) +believe it sets `X-Forwarded-Proto` by default.) # Upgrading to v1.27.0 @@ -529,13 +529,13 @@ mapping provider to specify different algorithms, instead of the way](<https://matrix.org/docs/spec/appendices#mapping-from-other-character-sets>). If your Synapse configuration uses a custom mapping provider -([oidc_config.user_mapping_provider.module]{.title-ref} is specified and +(`oidc_config.user_mapping_provider.module` is specified and not equal to -[synapse.handlers.oidc_handler.JinjaOidcMappingProvider]{.title-ref}) -then you *must* ensure that [map_user_attributes]{.title-ref} of the +`synapse.handlers.oidc_handler.JinjaOidcMappingProvider`) +then you *must* ensure that `map_user_attributes` of the mapping provider performs some normalisation of the -[localpart]{.title-ref} returned. To match previous behaviour you can -use the [map_username_to_mxid_localpart]{.title-ref} function provided +`localpart` returned. To match previous behaviour you can +use the `map_username_to_mxid_localpart` function provided by Synapse. An example is shown below: ```python @@ -564,7 +564,7 @@ v1.24.0. The Admin API is now only accessible under: - `/_synapse/admin/v1` -The only exception is the [/admin/whois]{.title-ref} endpoint, which is +The only exception is the `/admin/whois` endpoint, which is [also available via the client-server API](https://matrix.org/docs/spec/client_server/r0.6.1#get-matrix-client-r0-admin-whois-userid). @@ -639,7 +639,7 @@ This page will appear to the user after clicking a password reset link that has been emailed to them. To complete password reset, the page must include a way to make a -[POST]{.title-ref} request to +`POST` request to `/_synapse/client/password_reset/{medium}/submit_token` with the query parameters from the original link, presented as a URL-encoded form. See the file itself for more details. @@ -660,18 +660,18 @@ but the parameters are slightly different: # Upgrading to v1.18.0 -## Docker [-py3]{.title-ref} suffix will be removed in future versions +## Docker `-py3` suffix will be removed in future versions From 10th August 2020, we will no longer publish Docker images with the -[-py3]{.title-ref} tag suffix. The images tagged with the -[-py3]{.title-ref} suffix have been identical to the non-suffixed tags +`-py3` tag suffix. The images tagged with the +`-py3` suffix have been identical to the non-suffixed tags since release 0.99.0, and the suffix is obsolete. -On 10th August, we will remove the [latest-py3]{.title-ref} tag. -Existing per-release tags (such as [v1.18.0-py3]{.title-ref}) will not -be removed, but no new [-py3]{.title-ref} tags will be added. +On 10th August, we will remove the `latest-py3` tag. +Existing per-release tags (such as `v1.18.0-py3` will not +be removed, but no new `-py3` tags will be added. -Scripts relying on the [-py3]{.title-ref} suffix will need to be +Scripts relying on the `-py3` suffix will need to be updated. ## Redis replication is now recommended in lieu of TCP replication @@ -705,8 +705,8 @@ This will *not* be a problem for Synapse installations which were: If completeness of the room directory is a concern, installations which are affected can be repaired as follows: -1. Run the following sql from a [psql]{.title-ref} or - [sqlite3]{.title-ref} console: +1. Run the following sql from a `psql` or + `sqlite3` console: ```sql INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES @@ -770,8 +770,8 @@ participating in many rooms. of any problems. 1. As an initial check to see if you will be affected, you can try - running the following query from the [psql]{.title-ref} or - [sqlite3]{.title-ref} console. It is safe to run it while Synapse is + running the following query from the `psql` or + `sqlite3` console. It is safe to run it while Synapse is still running. ```sql @@ -1353,9 +1353,9 @@ first need to upgrade the database by running: python scripts/upgrade_db_to_v0.6.0.py <db> <server_name> <signing_key> -Where [<db>]{.title-ref} is the location of the database, -[<server_name>]{.title-ref} is the server name as specified in the -synapse configuration, and [<signing_key>]{.title-ref} is the location +Where `<db>` is the location of the database, +`<server_name>` is the server name as specified in the +synapse configuration, and `<signing_key>` is the location of the signing key as specified in the synapse configuration. This may take some time to complete. Failures of signatures and content diff --git a/docs/usage/administration/admin_api/registration_tokens.md b/docs/usage/administration/admin_api/registration_tokens.md index c48d060dcc..13d5eb75e9 100644 --- a/docs/usage/administration/admin_api/registration_tokens.md +++ b/docs/usage/administration/admin_api/registration_tokens.md @@ -149,7 +149,7 @@ POST /_synapse/admin/v1/registration_tokens/new The request body must be a JSON object and can contain the following fields: - `token`: The registration token. A string of no more than 64 characters that - consists only of characters matched by the regex `[A-Za-z0-9-_]`. + consists only of characters matched by the regex `[A-Za-z0-9._~-]`. Default: randomly generated. - `uses_allowed`: The integer number of times the token can be used to complete a registration before it becomes invalid. diff --git a/docs/usage/configuration/logging_sample_config.md b/docs/usage/configuration/logging_sample_config.md index a673d487b8..499ab7cfe5 100644 --- a/docs/usage/configuration/logging_sample_config.md +++ b/docs/usage/configuration/logging_sample_config.md @@ -2,13 +2,13 @@ Below is a sample logging configuration file. This file can be tweaked to control how your homeserver will output logs. A restart of the server is generally required to apply any -changes made to this file. +changes made to this file. The value of the `log_config` option in your homeserver +config should be the path to this file. -Note that the contents below are *not* intended to be copied and used as the basis for -a real homeserver.yaml. Instead, if you are starting from scratch, please generate -a fresh config using Synapse by following the instructions in -[Installation](../../setup/installation.md). +Note that a default logging configuration (shown below) is created automatically alongside +the homeserver config when following the [installation instructions](../../setup/installation.md). +It should be named `<SERVERNAME>.log.config` by default. ```yaml {{#include ../../sample_log_config.yaml}} -``` \ No newline at end of file +``` diff --git a/mypy.ini b/mypy.ini index a7019e2bd4..14d8bb8eaf 100644 --- a/mypy.ini +++ b/mypy.ini @@ -22,8 +22,11 @@ files = synapse/crypto, synapse/event_auth.py, synapse/events/builder.py, + synapse/events/presence_router.py, + synapse/events/snapshot.py, synapse/events/spamcheck.py, synapse/events/third_party_rules.py, + synapse/events/utils.py, synapse/events/validator.py, synapse/federation, synapse/groups, @@ -53,6 +56,7 @@ files = synapse/storage/_base.py, synapse/storage/background_updates.py, synapse/storage/databases/main/appservice.py, + synapse/storage/databases/main/client_ips.py, synapse/storage/databases/main/events.py, synapse/storage/databases/main/keys.py, synapse/storage/databases/main/pusher.py, @@ -88,11 +92,20 @@ files = tests/handlers/test_user_directory.py, tests/rest/client/test_login.py, tests/rest/client/test_auth.py, + tests/rest/client/test_relations.py, + tests/rest/media/v1/test_filepath.py, + tests/rest/media/v1/test_oembed.py, tests/storage/test_state.py, tests/storage/test_user_directory.py, tests/util/test_itertools.py, tests/util/test_stream_change_cache.py +[mypy-synapse.api.*] +disallow_untyped_defs = True + +[mypy-synapse.events.*] +disallow_untyped_defs = True + [mypy-synapse.handlers.*] disallow_untyped_defs = True @@ -108,6 +121,9 @@ disallow_untyped_defs = True [mypy-synapse.state.*] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.client_ips] +disallow_untyped_defs = True + [mypy-synapse.storage.util.*] disallow_untyped_defs = True diff --git a/scripts-dev/build_debian_packages b/scripts-dev/build_debian_packages index e9f89e38ef..3a9a2d257c 100755 --- a/scripts-dev/build_debian_packages +++ b/scripts-dev/build_debian_packages @@ -27,6 +27,7 @@ DISTS = ( "ubuntu:bionic", # 18.04 LTS (our EOL forced by Py36 on 2021-12-23) "ubuntu:focal", # 20.04 LTS (our EOL forced by Py38 on 2024-10-14) "ubuntu:hirsute", # 21.04 (EOL 2022-01-05) + "ubuntu:impish", # 21.10 (EOL 2022-07) ) DESC = """\ diff --git a/synapse/api/auth.py b/synapse/api/auth.py index e6ca9232ee..44883c6663 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -245,7 +245,7 @@ class Auth: async def validate_appservice_can_control_user_id( self, app_service: ApplicationService, user_id: str - ): + ) -> None: """Validates that the app service is allowed to control the given user. @@ -618,5 +618,13 @@ class Auth: % (user_id, room_id), ) - async def check_auth_blocking(self, *args, **kwargs) -> None: - await self._auth_blocking.check_auth_blocking(*args, **kwargs) + async def check_auth_blocking( + self, + user_id: Optional[str] = None, + threepid: Optional[dict] = None, + user_type: Optional[str] = None, + requester: Optional[Requester] = None, + ) -> None: + await self._auth_blocking.check_auth_blocking( + user_id=user_id, threepid=threepid, user_type=user_type, requester=requester + ) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 9480f448d7..685d1c25cf 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -18,7 +18,7 @@ import logging import typing from http import HTTPStatus -from typing import Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union from twisted.web import http @@ -143,7 +143,7 @@ class SynapseError(CodeMessageException): super().__init__(code, msg) self.errcode = errcode - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode) @@ -175,7 +175,7 @@ class ProxiedRequestError(SynapseError): else: self._additional_fields = dict(additional_fields) - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, **self._additional_fields) @@ -196,7 +196,7 @@ class ConsentNotGivenError(SynapseError): ) self._consent_uri = consent_uri - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, consent_uri=self._consent_uri) @@ -262,14 +262,10 @@ class InteractiveAuthIncompleteError(Exception): class UnrecognizedRequestError(SynapseError): """An error indicating we don't understand the request you're trying to make""" - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.UNRECOGNIZED - if len(args) == 0: - message = "Unrecognized request" - else: - message = args[0] - super().__init__(400, message, **kwargs) + def __init__( + self, msg: str = "Unrecognized request", errcode: str = Codes.UNRECOGNIZED + ): + super().__init__(400, msg, errcode) class NotFoundError(SynapseError): @@ -284,10 +280,8 @@ class AuthError(SynapseError): other poorly-defined times. """ - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.FORBIDDEN - super().__init__(*args, **kwargs) + def __init__(self, code: int, msg: str, errcode: str = Codes.FORBIDDEN): + super().__init__(code, msg, errcode) class InvalidClientCredentialsError(SynapseError): @@ -321,7 +315,7 @@ class InvalidClientTokenError(InvalidClientCredentialsError): super().__init__(msg=msg, errcode="M_UNKNOWN_TOKEN") self._soft_logout = soft_logout - def error_dict(self): + def error_dict(self) -> "JsonDict": d = super().error_dict() d["soft_logout"] = self._soft_logout return d @@ -345,7 +339,7 @@ class ResourceLimitError(SynapseError): self.limit_type = limit_type super().__init__(code, msg, errcode=errcode) - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error( self.msg, self.errcode, @@ -357,32 +351,17 @@ class ResourceLimitError(SynapseError): class EventSizeError(SynapseError): """An error raised when an event is too big.""" - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.TOO_LARGE - super().__init__(413, *args, **kwargs) - - -class EventStreamError(SynapseError): - """An error raised when there a problem with the event stream.""" - - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.BAD_PAGINATION - super().__init__(*args, **kwargs) + def __init__(self, msg: str): + super().__init__(413, msg, Codes.TOO_LARGE) class LoginError(SynapseError): """An error raised when there was a problem logging in.""" - pass - class StoreError(SynapseError): """An error raised when there was a problem storing some data.""" - pass - class InvalidCaptchaError(SynapseError): def __init__( @@ -395,7 +374,7 @@ class InvalidCaptchaError(SynapseError): super().__init__(code, msg, errcode) self.error_url = error_url - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, error_url=self.error_url) @@ -412,7 +391,7 @@ class LimitExceededError(SynapseError): super().__init__(code, msg, errcode) self.retry_after_ms = retry_after_ms - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, retry_after_ms=self.retry_after_ms) @@ -443,10 +422,8 @@ class UnsupportedRoomVersionError(SynapseError): class ThreepidValidationError(SynapseError): """An error raised when there was a problem authorising an event.""" - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.FORBIDDEN - super().__init__(*args, **kwargs) + def __init__(self, msg: str, errcode: str = Codes.FORBIDDEN): + super().__init__(400, msg, errcode) class IncompatibleRoomVersionError(SynapseError): @@ -466,7 +443,7 @@ class IncompatibleRoomVersionError(SynapseError): self._room_version = room_version - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, room_version=self._room_version) @@ -494,7 +471,7 @@ class RequestSendFailed(RuntimeError): errors (like programming errors). """ - def __init__(self, inner_exception, can_retry): + def __init__(self, inner_exception: BaseException, can_retry: bool): super().__init__( "Failed to send request: %s: %s" % (type(inner_exception).__name__, inner_exception) @@ -503,7 +480,7 @@ class RequestSendFailed(RuntimeError): self.can_retry = can_retry -def cs_error(msg: str, code: str = Codes.UNKNOWN, **kwargs): +def cs_error(msg: str, code: str = Codes.UNKNOWN, **kwargs: Any) -> "JsonDict": """Utility method for constructing an error response for client-server interactions. @@ -551,7 +528,7 @@ class FederationError(RuntimeError): msg = "%s %s: %s" % (level, code, reason) super().__init__(msg) - def get_dict(self): + def get_dict(self) -> "JsonDict": return { "level": self.level, "code": self.code, @@ -580,7 +557,7 @@ class HttpResponseException(CodeMessageException): super().__init__(code, msg) self.response = response - def to_synapse_error(self): + def to_synapse_error(self) -> SynapseError: """Make a SynapseError based on an HTTPResponseException This is useful when a proxied request has failed, and we need to diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 20e91a115d..bc550ae646 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -231,24 +231,24 @@ class FilterCollection: def include_redundant_members(self) -> bool: return self._room_state_filter.include_redundant_members() - def filter_presence(self, events): + def filter_presence( + self, events: Iterable[UserPresenceState] + ) -> List[UserPresenceState]: return self._presence_filter.filter(events) - def filter_account_data(self, events): + def filter_account_data(self, events: Iterable[JsonDict]) -> List[JsonDict]: return self._account_data.filter(events) - def filter_room_state(self, events): + def filter_room_state(self, events: Iterable[EventBase]) -> List[EventBase]: return self._room_state_filter.filter(self._room_filter.filter(events)) - def filter_room_timeline(self, events: Iterable[FilterEvent]) -> List[FilterEvent]: + def filter_room_timeline(self, events: Iterable[EventBase]) -> List[EventBase]: return self._room_timeline_filter.filter(self._room_filter.filter(events)) - def filter_room_ephemeral(self, events: Iterable[FilterEvent]) -> List[FilterEvent]: + def filter_room_ephemeral(self, events: Iterable[JsonDict]) -> List[JsonDict]: return self._room_ephemeral_filter.filter(self._room_filter.filter(events)) - def filter_room_account_data( - self, events: Iterable[FilterEvent] - ) -> List[FilterEvent]: + def filter_room_account_data(self, events: Iterable[JsonDict]) -> List[JsonDict]: return self._room_account_data.filter(self._room_filter.filter(events)) def blocks_all_presence(self) -> bool: @@ -309,7 +309,7 @@ class Filter: # except for presence which actually gets passed around as its own # namedtuple type. if isinstance(event, UserPresenceState): - sender = event.user_id + sender: Optional[str] = event.user_id room_id = None ev_type = "m.presence" contains_url = False diff --git a/synapse/api/presence.py b/synapse/api/presence.py index a3bf0348d1..b80aa83cb3 100644 --- a/synapse/api/presence.py +++ b/synapse/api/presence.py @@ -12,49 +12,48 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections import namedtuple +from typing import Any, Optional + +import attr from synapse.api.constants import PresenceState +from synapse.types import JsonDict -class UserPresenceState( - namedtuple( - "UserPresenceState", - ( - "user_id", - "state", - "last_active_ts", - "last_federation_update_ts", - "last_user_sync_ts", - "status_msg", - "currently_active", - ), - ) -): +@attr.s(slots=True, frozen=True, auto_attribs=True) +class UserPresenceState: """Represents the current presence state of the user. - user_id (str) - last_active (int): Time in msec that the user last interacted with server. - last_federation_update (int): Time in msec since either a) we sent a presence + user_id + last_active: Time in msec that the user last interacted with server. + last_federation_update: Time in msec since either a) we sent a presence update to other servers or b) we received a presence update, depending on if is a local user or not. - last_user_sync (int): Time in msec that the user last *completed* a sync + last_user_sync: Time in msec that the user last *completed* a sync (or event stream). - status_msg (str): User set status message. + status_msg: User set status message. """ - def as_dict(self): - return dict(self._asdict()) + user_id: str + state: str + last_active_ts: int + last_federation_update_ts: int + last_user_sync_ts: int + status_msg: Optional[str] + currently_active: bool + + def as_dict(self) -> JsonDict: + return attr.asdict(self) @staticmethod - def from_dict(d): + def from_dict(d: JsonDict) -> "UserPresenceState": return UserPresenceState(**d) - def copy_and_replace(self, **kwargs): - return self._replace(**kwargs) + def copy_and_replace(self, **kwargs: Any) -> "UserPresenceState": + return attr.evolve(self, **kwargs) @classmethod - def default(cls, user_id): + def default(cls, user_id: str) -> "UserPresenceState": """Returns a default presence state.""" return cls( user_id=user_id, diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index e8964097d3..849c18ceda 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -161,7 +161,7 @@ class Ratelimiter: return allowed, time_allowed - def _prune_message_counts(self, time_now_s: float): + def _prune_message_counts(self, time_now_s: float) -> None: """Remove message count entries that have not exceeded their defined rate_hz limit @@ -190,7 +190,7 @@ class Ratelimiter: update: bool = True, n_actions: int = 1, _time_now_s: Optional[float] = None, - ): + ) -> None: """Checks if an action can be performed. If not, raises a LimitExceededError Checks if the user has ratelimiting disabled in the database by looking diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 032c69b210..6e84b1524f 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -19,6 +19,7 @@ from hashlib import sha256 from urllib.parse import urlencode from synapse.config import ConfigError +from synapse.config.homeserver import HomeServerConfig SYNAPSE_CLIENT_API_PREFIX = "/_synapse/client" CLIENT_API_PREFIX = "/_matrix/client" @@ -34,11 +35,7 @@ LEGACY_MEDIA_PREFIX = "/_matrix/media/v1" class ConsentURIBuilder: - def __init__(self, hs_config): - """ - Args: - hs_config (synapse.config.homeserver.HomeServerConfig): - """ + def __init__(self, hs_config: HomeServerConfig): if hs_config.key.form_secret is None: raise ConfigError("form_secret not set in config") if hs_config.server.public_baseurl is None: @@ -47,15 +44,15 @@ class ConsentURIBuilder: self._hmac_secret = hs_config.key.form_secret.encode("utf-8") self._public_baseurl = hs_config.server.public_baseurl - def build_user_consent_uri(self, user_id): + def build_user_consent_uri(self, user_id: str) -> str: """Build a URI which we can give to the user to do their privacy policy consent Args: - user_id (str): mxid or username of user + user_id: mxid or username of user Returns - (str) the URI where the user can do consent + The URI where the user can do consent """ mac = hmac.new( key=self._hmac_secret, msg=user_id.encode("ascii"), digestmod=sha256 diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 4a204a5823..bb4d53d778 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -42,6 +42,7 @@ from synapse.crypto import context_factory from synapse.events.presence_router import load_legacy_presence_router from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.events.third_party_rules import load_legacy_third_party_event_rules +from synapse.handlers.auth import load_legacy_password_auth_providers from synapse.logging.context import PreserveLoggingContext from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.metrics.jemalloc import setup_jemalloc_stats @@ -379,6 +380,7 @@ async def start(hs: "HomeServer"): load_legacy_spam_checkers(hs) load_legacy_third_party_event_rules(hs) load_legacy_presence_router(hs) + load_legacy_password_auth_providers(hs) # If we've configured an expiry time for caches, start the background job now. setup_expire_lru_cache_entries(hs) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 13d20af457..b156b93bf3 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -39,6 +39,7 @@ from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.server import HomeServer +from synapse.storage.databases.main.room import RoomWorkerStore from synapse.util.logcontext import LoggingContext from synapse.util.versionstring import get_version_string @@ -58,6 +59,7 @@ class AdminCmdSlavedStore( SlavedEventStore, SlavedClientIpStore, BaseSlavedStore, + RoomWorkerStore, ): pass @@ -185,11 +187,7 @@ def start(config_options): # a full worker config. config.worker.worker_app = "synapse.app.admin_cmd" - if ( - not config.worker.worker_daemonize - and not config.worker.worker_log_file - and not config.worker.worker_log_config - ): + if not config.worker.worker_daemonize and not config.worker.worker_log_config: # Since we're meant to be run as a "command" let's not redirect stdio # unless we've actually set log config. config.logging.no_redirect_stdio = True @@ -198,9 +196,9 @@ def start(config_options): config.server.update_user_directory = False config.worker.run_background_tasks = False config.worker.start_pushers = False - config.pusher_shard_config.instances = [] + config.worker.pusher_shard_config.instances = [] config.worker.send_federation = False - config.federation_shard_config.instances = [] + config.worker.federation_shard_config.instances = [] synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts @@ -221,7 +219,7 @@ def start(config_options): async def run(): with LoggingContext("command"): - _base.start(ss) + await _base.start(ss) await args.func(ss, args) _base.start_worker_reactor( diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index 06fbd1166b..c1d9069798 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -26,6 +26,7 @@ from synapse.config import ( redis, registration, repository, + retention, room_directory, saml2, server, @@ -91,6 +92,7 @@ class RootConfig: modules: modules.ModulesConfig caches: cache.CacheConfig federation: federation.FederationConfig + retention: retention.RetentionConfig config_classes: List = ... def __init__(self) -> None: ... diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 7b0381c06a..b013a3918c 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -24,6 +24,9 @@ class ExperimentalConfig(Config): def read_config(self, config: JsonDict, **kwargs): experimental = config.get("experimental_features") or {} + # Whether to enable experimental MSC1849 (aka relations) support + self.msc1849_enabled = config.get("experimental_msc1849_support_enabled", True) + # MSC3026 (busy presence state) self.msc3026_enabled: bool = experimental.get("msc3026_enabled", False) diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 442f1b9ac0..001605c265 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -38,6 +38,7 @@ from .ratelimiting import RatelimitConfig from .redis import RedisConfig from .registration import RegistrationConfig from .repository import ContentRepositoryConfig +from .retention import RetentionConfig from .room import RoomConfig from .room_directory import RoomDirectoryConfig from .saml2 import SAML2Config @@ -59,6 +60,7 @@ class HomeServerConfig(RootConfig): config_classes = [ ModulesConfig, ServerConfig, + RetentionConfig, TlsConfig, FederationConfig, CacheConfig, diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index 83994df798..f980102b45 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -25,6 +25,29 @@ class PasswordAuthProviderConfig(Config): section = "authproviders" def read_config(self, config, **kwargs): + """Parses the old password auth providers config. The config format looks like this: + + password_providers: + # Example config for an LDAP auth provider + - module: "ldap_auth_provider.LdapAuthProvider" + config: + enabled: true + uri: "ldap://ldap.example.com:389" + start_tls: true + base: "ou=users,dc=example,dc=com" + attributes: + uid: "cn" + mail: "email" + name: "givenName" + #bind_dn: + #bind_password: + #filter: "(objectClass=posixAccount)" + + We expect admins to use modules for this feature (which is why it doesn't appear + in the sample config file), but we want to keep support for it around for a bit + for backwards compatibility. + """ + self.password_providers: List[Tuple[Type, Any]] = [] providers = [] @@ -49,33 +72,3 @@ class PasswordAuthProviderConfig(Config): ) self.password_providers.append((provider_class, provider_config)) - - def generate_config_section(self, **kwargs): - return """\ - # Password providers allow homeserver administrators to integrate - # their Synapse installation with existing authentication methods - # ex. LDAP, external tokens, etc. - # - # For more information and known implementations, please see - # https://matrix-org.github.io/synapse/latest/password_auth_providers.html - # - # Note: instances wishing to use SAML or CAS authentication should - # instead use the `saml2_config` or `cas_config` options, - # respectively. - # - password_providers: - # # Example config for an LDAP auth provider - # - module: "ldap_auth_provider.LdapAuthProvider" - # config: - # enabled: true - # uri: "ldap://ldap.example.com:389" - # start_tls: true - # base: "ou=users,dc=example,dc=com" - # attributes: - # uid: "cn" - # mail: "email" - # name: "givenName" - # #bind_dn: - # #bind_password: - # #filter: "(objectClass=posixAccount)" - """ diff --git a/synapse/config/retention.py b/synapse/config/retention.py new file mode 100644 index 0000000000..aed9bf458f --- /dev/null +++ b/synapse/config/retention.py @@ -0,0 +1,226 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import List, Optional + +import attr + +from synapse.config._base import Config, ConfigError + +logger = logging.getLogger(__name__) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class RetentionPurgeJob: + """Object describing the configuration of the manhole""" + + interval: int + shortest_max_lifetime: Optional[int] + longest_max_lifetime: Optional[int] + + +class RetentionConfig(Config): + section = "retention" + + def read_config(self, config, **kwargs): + retention_config = config.get("retention") + if retention_config is None: + retention_config = {} + + self.retention_enabled = retention_config.get("enabled", False) + + retention_default_policy = retention_config.get("default_policy") + + if retention_default_policy is not None: + self.retention_default_min_lifetime = retention_default_policy.get( + "min_lifetime" + ) + if self.retention_default_min_lifetime is not None: + self.retention_default_min_lifetime = self.parse_duration( + self.retention_default_min_lifetime + ) + + self.retention_default_max_lifetime = retention_default_policy.get( + "max_lifetime" + ) + if self.retention_default_max_lifetime is not None: + self.retention_default_max_lifetime = self.parse_duration( + self.retention_default_max_lifetime + ) + + if ( + self.retention_default_min_lifetime is not None + and self.retention_default_max_lifetime is not None + and ( + self.retention_default_min_lifetime + > self.retention_default_max_lifetime + ) + ): + raise ConfigError( + "The default retention policy's 'min_lifetime' can not be greater" + " than its 'max_lifetime'" + ) + else: + self.retention_default_min_lifetime = None + self.retention_default_max_lifetime = None + + if self.retention_enabled: + logger.info( + "Message retention policies support enabled with the following default" + " policy: min_lifetime = %s ; max_lifetime = %s", + self.retention_default_min_lifetime, + self.retention_default_max_lifetime, + ) + + self.retention_allowed_lifetime_min = retention_config.get( + "allowed_lifetime_min" + ) + if self.retention_allowed_lifetime_min is not None: + self.retention_allowed_lifetime_min = self.parse_duration( + self.retention_allowed_lifetime_min + ) + + self.retention_allowed_lifetime_max = retention_config.get( + "allowed_lifetime_max" + ) + if self.retention_allowed_lifetime_max is not None: + self.retention_allowed_lifetime_max = self.parse_duration( + self.retention_allowed_lifetime_max + ) + + if ( + self.retention_allowed_lifetime_min is not None + and self.retention_allowed_lifetime_max is not None + and self.retention_allowed_lifetime_min + > self.retention_allowed_lifetime_max + ): + raise ConfigError( + "Invalid retention policy limits: 'allowed_lifetime_min' can not be" + " greater than 'allowed_lifetime_max'" + ) + + self.retention_purge_jobs: List[RetentionPurgeJob] = [] + for purge_job_config in retention_config.get("purge_jobs", []): + interval_config = purge_job_config.get("interval") + + if interval_config is None: + raise ConfigError( + "A retention policy's purge jobs configuration must have the" + " 'interval' key set." + ) + + interval = self.parse_duration(interval_config) + + shortest_max_lifetime = purge_job_config.get("shortest_max_lifetime") + + if shortest_max_lifetime is not None: + shortest_max_lifetime = self.parse_duration(shortest_max_lifetime) + + longest_max_lifetime = purge_job_config.get("longest_max_lifetime") + + if longest_max_lifetime is not None: + longest_max_lifetime = self.parse_duration(longest_max_lifetime) + + if ( + shortest_max_lifetime is not None + and longest_max_lifetime is not None + and shortest_max_lifetime > longest_max_lifetime + ): + raise ConfigError( + "A retention policy's purge jobs configuration's" + " 'shortest_max_lifetime' value can not be greater than its" + " 'longest_max_lifetime' value." + ) + + self.retention_purge_jobs.append( + RetentionPurgeJob(interval, shortest_max_lifetime, longest_max_lifetime) + ) + + if not self.retention_purge_jobs: + self.retention_purge_jobs = [ + RetentionPurgeJob(self.parse_duration("1d"), None, None) + ] + + def generate_config_section(self, config_dir_path, server_name, **kwargs): + return """\ + # Message retention policy at the server level. + # + # Room admins and mods can define a retention period for their rooms using the + # 'm.room.retention' state event, and server admins can cap this period by setting + # the 'allowed_lifetime_min' and 'allowed_lifetime_max' config options. + # + # If this feature is enabled, Synapse will regularly look for and purge events + # which are older than the room's maximum retention period. Synapse will also + # filter events received over federation so that events that should have been + # purged are ignored and not stored again. + # + retention: + # The message retention policies feature is disabled by default. Uncomment the + # following line to enable it. + # + #enabled: true + + # Default retention policy. If set, Synapse will apply it to rooms that lack the + # 'm.room.retention' state event. Currently, the value of 'min_lifetime' doesn't + # matter much because Synapse doesn't take it into account yet. + # + #default_policy: + # min_lifetime: 1d + # max_lifetime: 1y + + # Retention policy limits. If set, and the state of a room contains a + # 'm.room.retention' event in its state which contains a 'min_lifetime' or a + # 'max_lifetime' that's out of these bounds, Synapse will cap the room's policy + # to these limits when running purge jobs. + # + #allowed_lifetime_min: 1d + #allowed_lifetime_max: 1y + + # Server admins can define the settings of the background jobs purging the + # events which lifetime has expired under the 'purge_jobs' section. + # + # If no configuration is provided, a single job will be set up to delete expired + # events in every room daily. + # + # Each job's configuration defines which range of message lifetimes the job + # takes care of. For example, if 'shortest_max_lifetime' is '2d' and + # 'longest_max_lifetime' is '3d', the job will handle purging expired events in + # rooms whose state defines a 'max_lifetime' that's both higher than 2 days, and + # lower than or equal to 3 days. Both the minimum and the maximum value of a + # range are optional, e.g. a job with no 'shortest_max_lifetime' and a + # 'longest_max_lifetime' of '3d' will handle every room with a retention policy + # which 'max_lifetime' is lower than or equal to three days. + # + # The rationale for this per-job configuration is that some rooms might have a + # retention policy with a low 'max_lifetime', where history needs to be purged + # of outdated messages on a more frequent basis than for the rest of the rooms + # (e.g. every 12h), but not want that purge to be performed by a job that's + # iterating over every room it knows, which could be heavy on the server. + # + # If any purge job is configured, it is strongly recommended to have at least + # a single job with neither 'shortest_max_lifetime' nor 'longest_max_lifetime' + # set, or one job without 'shortest_max_lifetime' and one job without + # 'longest_max_lifetime' set. Otherwise some rooms might be ignored, even if + # 'allowed_lifetime_min' and 'allowed_lifetime_max' are set, because capping a + # room's policy to these values is done after the policies are retrieved from + # Synapse's database (which is done using the range specified in a purge job's + # configuration). + # + #purge_jobs: + # - longest_max_lifetime: 3d + # interval: 12h + # - shortest_max_lifetime: 3d + # interval: 1d + """ diff --git a/synapse/config/server.py b/synapse/config/server.py index 818b806357..ed094bdc44 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -225,15 +225,6 @@ class ManholeConfig: pub_key: Optional[Key] -@attr.s(slots=True, frozen=True, auto_attribs=True) -class RetentionConfig: - """Object describing the configuration of the manhole""" - - interval: int - shortest_max_lifetime: Optional[int] - longest_max_lifetime: Optional[int] - - @attr.s(frozen=True) class LimitRemoteRoomsConfig: enabled: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False) @@ -376,11 +367,6 @@ class ServerConfig(Config): # (other than those sent by local server admins) self.block_non_admin_invites = config.get("block_non_admin_invites", False) - # Whether to enable experimental MSC1849 (aka relations) support - self.experimental_msc1849_support_enabled = config.get( - "experimental_msc1849_support_enabled", True - ) - # Options to control access by tracking MAU self.limit_usage_by_mau = config.get("limit_usage_by_mau", False) self.max_mau_value = 0 @@ -466,124 +452,6 @@ class ServerConfig(Config): # events with profile information that differ from the target's global profile. self.allow_per_room_profiles = config.get("allow_per_room_profiles", True) - retention_config = config.get("retention") - if retention_config is None: - retention_config = {} - - self.retention_enabled = retention_config.get("enabled", False) - - retention_default_policy = retention_config.get("default_policy") - - if retention_default_policy is not None: - self.retention_default_min_lifetime = retention_default_policy.get( - "min_lifetime" - ) - if self.retention_default_min_lifetime is not None: - self.retention_default_min_lifetime = self.parse_duration( - self.retention_default_min_lifetime - ) - - self.retention_default_max_lifetime = retention_default_policy.get( - "max_lifetime" - ) - if self.retention_default_max_lifetime is not None: - self.retention_default_max_lifetime = self.parse_duration( - self.retention_default_max_lifetime - ) - - if ( - self.retention_default_min_lifetime is not None - and self.retention_default_max_lifetime is not None - and ( - self.retention_default_min_lifetime - > self.retention_default_max_lifetime - ) - ): - raise ConfigError( - "The default retention policy's 'min_lifetime' can not be greater" - " than its 'max_lifetime'" - ) - else: - self.retention_default_min_lifetime = None - self.retention_default_max_lifetime = None - - if self.retention_enabled: - logger.info( - "Message retention policies support enabled with the following default" - " policy: min_lifetime = %s ; max_lifetime = %s", - self.retention_default_min_lifetime, - self.retention_default_max_lifetime, - ) - - self.retention_allowed_lifetime_min = retention_config.get( - "allowed_lifetime_min" - ) - if self.retention_allowed_lifetime_min is not None: - self.retention_allowed_lifetime_min = self.parse_duration( - self.retention_allowed_lifetime_min - ) - - self.retention_allowed_lifetime_max = retention_config.get( - "allowed_lifetime_max" - ) - if self.retention_allowed_lifetime_max is not None: - self.retention_allowed_lifetime_max = self.parse_duration( - self.retention_allowed_lifetime_max - ) - - if ( - self.retention_allowed_lifetime_min is not None - and self.retention_allowed_lifetime_max is not None - and self.retention_allowed_lifetime_min - > self.retention_allowed_lifetime_max - ): - raise ConfigError( - "Invalid retention policy limits: 'allowed_lifetime_min' can not be" - " greater than 'allowed_lifetime_max'" - ) - - self.retention_purge_jobs: List[RetentionConfig] = [] - for purge_job_config in retention_config.get("purge_jobs", []): - interval_config = purge_job_config.get("interval") - - if interval_config is None: - raise ConfigError( - "A retention policy's purge jobs configuration must have the" - " 'interval' key set." - ) - - interval = self.parse_duration(interval_config) - - shortest_max_lifetime = purge_job_config.get("shortest_max_lifetime") - - if shortest_max_lifetime is not None: - shortest_max_lifetime = self.parse_duration(shortest_max_lifetime) - - longest_max_lifetime = purge_job_config.get("longest_max_lifetime") - - if longest_max_lifetime is not None: - longest_max_lifetime = self.parse_duration(longest_max_lifetime) - - if ( - shortest_max_lifetime is not None - and longest_max_lifetime is not None - and shortest_max_lifetime > longest_max_lifetime - ): - raise ConfigError( - "A retention policy's purge jobs configuration's" - " 'shortest_max_lifetime' value can not be greater than its" - " 'longest_max_lifetime' value." - ) - - self.retention_purge_jobs.append( - RetentionConfig(interval, shortest_max_lifetime, longest_max_lifetime) - ) - - if not self.retention_purge_jobs: - self.retention_purge_jobs = [ - RetentionConfig(self.parse_duration("1d"), None, None) - ] - self.listeners = [parse_listener_def(x) for x in config.get("listeners", [])] # no_tls is not really supported any more, but let's grandfather it in @@ -1255,75 +1123,6 @@ class ServerConfig(Config): # #user_ips_max_age: 14d - # Message retention policy at the server level. - # - # Room admins and mods can define a retention period for their rooms using the - # 'm.room.retention' state event, and server admins can cap this period by setting - # the 'allowed_lifetime_min' and 'allowed_lifetime_max' config options. - # - # If this feature is enabled, Synapse will regularly look for and purge events - # which are older than the room's maximum retention period. Synapse will also - # filter events received over federation so that events that should have been - # purged are ignored and not stored again. - # - retention: - # The message retention policies feature is disabled by default. Uncomment the - # following line to enable it. - # - #enabled: true - - # Default retention policy. If set, Synapse will apply it to rooms that lack the - # 'm.room.retention' state event. Currently, the value of 'min_lifetime' doesn't - # matter much because Synapse doesn't take it into account yet. - # - #default_policy: - # min_lifetime: 1d - # max_lifetime: 1y - - # Retention policy limits. If set, and the state of a room contains a - # 'm.room.retention' event in its state which contains a 'min_lifetime' or a - # 'max_lifetime' that's out of these bounds, Synapse will cap the room's policy - # to these limits when running purge jobs. - # - #allowed_lifetime_min: 1d - #allowed_lifetime_max: 1y - - # Server admins can define the settings of the background jobs purging the - # events which lifetime has expired under the 'purge_jobs' section. - # - # If no configuration is provided, a single job will be set up to delete expired - # events in every room daily. - # - # Each job's configuration defines which range of message lifetimes the job - # takes care of. For example, if 'shortest_max_lifetime' is '2d' and - # 'longest_max_lifetime' is '3d', the job will handle purging expired events in - # rooms whose state defines a 'max_lifetime' that's both higher than 2 days, and - # lower than or equal to 3 days. Both the minimum and the maximum value of a - # range are optional, e.g. a job with no 'shortest_max_lifetime' and a - # 'longest_max_lifetime' of '3d' will handle every room with a retention policy - # which 'max_lifetime' is lower than or equal to three days. - # - # The rationale for this per-job configuration is that some rooms might have a - # retention policy with a low 'max_lifetime', where history needs to be purged - # of outdated messages on a more frequent basis than for the rest of the rooms - # (e.g. every 12h), but not want that purge to be performed by a job that's - # iterating over every room it knows, which could be heavy on the server. - # - # If any purge job is configured, it is strongly recommended to have at least - # a single job with neither 'shortest_max_lifetime' nor 'longest_max_lifetime' - # set, or one job without 'shortest_max_lifetime' and one job without - # 'longest_max_lifetime' set. Otherwise some rooms might be ignored, even if - # 'allowed_lifetime_min' and 'allowed_lifetime_max' are set, because capping a - # room's policy to these values is done after the policies are retrieved from - # Synapse's database (which is done using the range specified in a purge job's - # configuration). - # - #purge_jobs: - # - longest_max_lifetime: 3d - # interval: 12h - # - shortest_max_lifetime: 3d - # interval: 1d - # Inhibits the /requestToken endpoints from returning an error that might leak # information about whether an e-mail address is in use or not on this # homeserver. diff --git a/synapse/event_auth.py b/synapse/event_auth.py index ca0293a3dc..e885961698 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -from typing import Any, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union from canonicaljson import encode_canonical_json from signedjson.key import decode_verify_key_bytes @@ -113,7 +113,7 @@ def validate_event_for_room_version( def check_auth_rules_for_event( - room_version_obj: RoomVersion, event: EventBase, auth_events: StateMap[EventBase] + room_version_obj: RoomVersion, event: EventBase, auth_events: Iterable[EventBase] ) -> None: """Check that an event complies with the auth rules @@ -137,8 +137,6 @@ def check_auth_rules_for_event( Raises: AuthError if the checks fail """ - assert isinstance(auth_events, dict) - # We need to ensure that the auth events are actually for the same room, to # stop people from using powers they've been granted in other rooms for # example. @@ -147,7 +145,7 @@ def check_auth_rules_for_event( # the state res algorithm isn't silly enough to give us events from different rooms. # Still, it's easier to do it anyway. room_id = event.room_id - for auth_event in auth_events.values(): + for auth_event in auth_events: if auth_event.room_id != room_id: raise AuthError( 403, @@ -186,8 +184,10 @@ def check_auth_rules_for_event( logger.debug("Allowing! %s", event) return + auth_dict = {(e.type, e.state_key): e for e in auth_events} + # 3. If event does not have a m.room.create in its auth_events, reject. - creation_event = auth_events.get((EventTypes.Create, ""), None) + creation_event = auth_dict.get((EventTypes.Create, ""), None) if not creation_event: raise AuthError(403, "No create event in auth events") @@ -195,7 +195,7 @@ def check_auth_rules_for_event( creating_domain = get_domain_from_id(event.room_id) originating_domain = get_domain_from_id(event.sender) if creating_domain != originating_domain: - if not _can_federate(event, auth_events): + if not _can_federate(event, auth_dict): raise AuthError(403, "This room has been marked as unfederatable.") # 4. If type is m.room.aliases @@ -217,23 +217,20 @@ def check_auth_rules_for_event( logger.debug("Allowing! %s", event) return - if logger.isEnabledFor(logging.DEBUG): - logger.debug("Auth events: %s", [a.event_id for a in auth_events.values()]) - # 5. If type is m.room.membership if event.type == EventTypes.Member: - _is_membership_change_allowed(room_version_obj, event, auth_events) + _is_membership_change_allowed(room_version_obj, event, auth_dict) logger.debug("Allowing! %s", event) return - _check_event_sender_in_room(event, auth_events) + _check_event_sender_in_room(event, auth_dict) # Special case to allow m.room.third_party_invite events wherever # a user is allowed to issue invites. Fixes # https://github.com/vector-im/vector-web/issues/1208 hopefully if event.type == EventTypes.ThirdPartyInvite: - user_level = get_user_power_level(event.user_id, auth_events) - invite_level = get_named_level(auth_events, "invite", 0) + user_level = get_user_power_level(event.user_id, auth_dict) + invite_level = get_named_level(auth_dict, "invite", 0) if user_level < invite_level: raise AuthError(403, "You don't have permission to invite users") @@ -241,20 +238,20 @@ def check_auth_rules_for_event( logger.debug("Allowing! %s", event) return - _can_send_event(event, auth_events) + _can_send_event(event, auth_dict) if event.type == EventTypes.PowerLevels: - _check_power_levels(room_version_obj, event, auth_events) + _check_power_levels(room_version_obj, event, auth_dict) if event.type == EventTypes.Redaction: - check_redaction(room_version_obj, event, auth_events) + check_redaction(room_version_obj, event, auth_dict) if ( event.type == EventTypes.MSC2716_INSERTION or event.type == EventTypes.MSC2716_BATCH or event.type == EventTypes.MSC2716_MARKER ): - check_historical(room_version_obj, event, auth_events) + check_historical(room_version_obj, event, auth_dict) logger.debug("Allowing! %s", event) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 49190459c8..157669ea88 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -348,12 +348,16 @@ class EventBase(metaclass=abc.ABCMeta): return self.__repr__() def __repr__(self): - return "<%s event_id=%r, type=%r, state_key=%r, outlier=%s>" % ( - self.__class__.__name__, - self.event_id, - self.get("type", None), - self.get("state_key", None), - self.internal_metadata.is_outlier(), + rejection = f"REJECTED={self.rejected_reason}, " if self.rejected_reason else "" + + return ( + f"<{self.__class__.__name__} " + f"{rejection}" + f"event_id={self.event_id}, " + f"type={self.get('type')}, " + f"state_key={self.get('state_key')}, " + f"outlier={self.internal_metadata.is_outlier()}" + ">" ) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 50f2a4c1f4..4f409f31e1 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -90,13 +90,13 @@ class EventBuilder: ) @property - def state_key(self): + def state_key(self) -> str: if self._state_key is not None: return self._state_key raise AttributeError("state_key") - def is_state(self): + def is_state(self) -> bool: return self._state_key is not None async def build( diff --git a/synapse/events/presence_router.py b/synapse/events/presence_router.py index 68b8b19024..a58f313e8b 100644 --- a/synapse/events/presence_router.py +++ b/synapse/events/presence_router.py @@ -14,6 +14,7 @@ import logging from typing import ( TYPE_CHECKING, + Any, Awaitable, Callable, Dict, @@ -33,14 +34,13 @@ if TYPE_CHECKING: GET_USERS_FOR_STATES_CALLBACK = Callable[ [Iterable[UserPresenceState]], Awaitable[Dict[str, Set[UserPresenceState]]] ] -GET_INTERESTED_USERS_CALLBACK = Callable[ - [str], Awaitable[Union[Set[str], "PresenceRouter.ALL_USERS"]] -] +# This must either return a set of strings or the constant PresenceRouter.ALL_USERS. +GET_INTERESTED_USERS_CALLBACK = Callable[[str], Awaitable[Union[Set[str], str]]] logger = logging.getLogger(__name__) -def load_legacy_presence_router(hs: "HomeServer"): +def load_legacy_presence_router(hs: "HomeServer") -> None: """Wrapper that loads a presence router module configured using the old configuration, and registers the hooks they implement. """ @@ -69,9 +69,10 @@ def load_legacy_presence_router(hs: "HomeServer"): if f is None: return None - def run(*args, **kwargs): - # mypy doesn't do well across function boundaries so we need to tell it - # f is definitely not None. + def run(*args: Any, **kwargs: Any) -> Awaitable: + # Assertion required because mypy can't prove we won't change `f` + # back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None return maybe_awaitable(f(*args, **kwargs)) @@ -104,7 +105,7 @@ class PresenceRouter: self, get_users_for_states: Optional[GET_USERS_FOR_STATES_CALLBACK] = None, get_interested_users: Optional[GET_INTERESTED_USERS_CALLBACK] = None, - ): + ) -> None: # PresenceRouter modules are required to implement both of these methods # or neither of them as they are assumed to act in a complementary manner paired_methods = [get_users_for_states, get_interested_users] @@ -142,7 +143,7 @@ class PresenceRouter: # Don't include any extra destinations for presence updates return {} - users_for_states = {} + users_for_states: Dict[str, Set[UserPresenceState]] = {} # run all the callbacks for get_users_for_states and combine the results for callback in self._get_users_for_states_callbacks: try: @@ -171,7 +172,7 @@ class PresenceRouter: return users_for_states - async def get_interested_users(self, user_id: str) -> Union[Set[str], ALL_USERS]: + async def get_interested_users(self, user_id: str) -> Union[Set[str], str]: """ Retrieve a list of users that `user_id` is interested in receiving the presence of. This will be in addition to those they share a room with. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 5ba01eeef9..d7527008c4 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -11,17 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union import attr from frozendict import frozendict +from twisted.internet.defer import Deferred + from synapse.appservice import ApplicationService from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background -from synapse.types import StateMap +from synapse.types import JsonDict, StateMap if TYPE_CHECKING: + from synapse.storage import Storage from synapse.storage.databases.main import DataStore @@ -112,13 +115,13 @@ class EventContext: @staticmethod def with_state( - state_group, - state_group_before_event, - current_state_ids, - prev_state_ids, - prev_group=None, - delta_ids=None, - ): + state_group: Optional[int], + state_group_before_event: Optional[int], + current_state_ids: Optional[StateMap[str]], + prev_state_ids: Optional[StateMap[str]], + prev_group: Optional[int] = None, + delta_ids: Optional[StateMap[str]] = None, + ) -> "EventContext": return EventContext( current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, @@ -129,22 +132,22 @@ class EventContext: ) @staticmethod - def for_outlier(): + def for_outlier() -> "EventContext": """Return an EventContext instance suitable for persisting an outlier event""" return EventContext( current_state_ids={}, prev_state_ids={}, ) - async def serialize(self, event: EventBase, store: "DataStore") -> dict: + async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict: """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` Args: - event (FrozenEvent): The event that this context relates to + event: The event that this context relates to Returns: - dict + The serialized event. """ # We don't serialize the full state dicts, instead they get pulled out @@ -170,17 +173,16 @@ class EventContext: } @staticmethod - def deserialize(storage, input): + def deserialize(storage: "Storage", input: JsonDict) -> "EventContext": """Converts a dict that was produced by `serialize` back into a EventContext. Args: - storage (Storage): Used to convert AS ID to AS object and fetch - state. - input (dict): A dict produced by `serialize` + storage: Used to convert AS ID to AS object and fetch state. + input: A dict produced by `serialize` Returns: - EventContext + The event context. """ context = _AsyncEventContextImpl( # We use the state_group and prev_state_id stuff to pull the @@ -241,22 +243,25 @@ class EventContext: await self._ensure_fetched() return self._current_state_ids - async def get_prev_state_ids(self): + async def get_prev_state_ids(self) -> StateMap[str]: """ Gets the room state map, excluding this event. For a non-state event, this will be the same as get_current_state_ids(). Returns: - dict[(str, str), str]|None: Returns None if state_group - is None, which happens when the associated event is an outlier. - Maps a (type, state_key) to the event ID of the state event matching - this tuple. + Returns {} if state_group is None, which happens when the associated + event is an outlier. + + Maps a (type, state_key) to the event ID of the state event matching + this tuple. """ await self._ensure_fetched() + # There *should* be previous state IDs now. + assert self._prev_state_ids is not None return self._prev_state_ids - def get_cached_current_state_ids(self): + def get_cached_current_state_ids(self) -> Optional[StateMap[str]]: """Gets the current state IDs if we have them already cached. It is an error to access this for a rejected event, since rejected state should @@ -264,16 +269,17 @@ class EventContext: ``rejected`` is set. Returns: - dict[(str, str), str]|None: Returns None if we haven't cached the - state or if state_group is None, which happens when the associated - event is an outlier. + Returns None if we haven't cached the state or if state_group is None + (which happens when the associated event is an outlier). + + Otherwise, returns the the current state IDs. """ if self.rejected: raise RuntimeError("Attempt to access state_ids of rejected event") return self._current_state_ids - async def _ensure_fetched(self): + async def _ensure_fetched(self) -> None: return None @@ -285,46 +291,46 @@ class _AsyncEventContextImpl(EventContext): Attributes: - _storage (Storage) + _storage - _fetching_state_deferred (Deferred|None): Resolves when *_state_ids have - been calculated. None if we haven't started calculating yet + _fetching_state_deferred: Resolves when *_state_ids have been calculated. + None if we haven't started calculating yet - _event_type (str): The type of the event the context is associated with. + _event_type: The type of the event the context is associated with. - _event_state_key (str): The state_key of the event the context is - associated with. + _event_state_key: The state_key of the event the context is associated with. - _prev_state_id (str|None): If the event associated with the context is - a state event, then `_prev_state_id` is the event_id of the state - that was replaced. + _prev_state_id: If the event associated with the context is a state event, + then `_prev_state_id` is the event_id of the state that was replaced. """ # This needs to have a default as we're inheriting - _storage = attr.ib(default=None) - _prev_state_id = attr.ib(default=None) - _event_type = attr.ib(default=None) - _event_state_key = attr.ib(default=None) - _fetching_state_deferred = attr.ib(default=None) + _storage: "Storage" = attr.ib(default=None) + _prev_state_id: Optional[str] = attr.ib(default=None) + _event_type: str = attr.ib(default=None) + _event_state_key: Optional[str] = attr.ib(default=None) + _fetching_state_deferred: Optional["Deferred[None]"] = attr.ib(default=None) - async def _ensure_fetched(self): + async def _ensure_fetched(self) -> None: if not self._fetching_state_deferred: self._fetching_state_deferred = run_in_background(self._fill_out_state) - return await make_deferred_yieldable(self._fetching_state_deferred) + await make_deferred_yieldable(self._fetching_state_deferred) - async def _fill_out_state(self): + async def _fill_out_state(self) -> None: """Called to populate the _current_state_ids and _prev_state_ids attributes by loading from the database. """ if self.state_group is None: return - self._current_state_ids = await self._storage.state.get_state_ids_for_group( + current_state_ids = await self._storage.state.get_state_ids_for_group( self.state_group ) + # Set this separately so mypy knows current_state_ids is not None. + self._current_state_ids = current_state_ids if self._event_state_key is not None: - self._prev_state_ids = dict(self._current_state_ids) + self._prev_state_ids = dict(current_state_ids) key = (self._event_type, self._event_state_key) if self._prev_state_id: @@ -332,10 +338,12 @@ class _AsyncEventContextImpl(EventContext): else: self._prev_state_ids.pop(key, None) else: - self._prev_state_ids = self._current_state_ids + self._prev_state_ids = current_state_ids -def _encode_state_dict(state_dict): +def _encode_state_dict( + state_dict: Optional[StateMap[str]], +) -> Optional[List[Tuple[str, str, str]]]: """Since dicts of (type, state_key) -> event_id cannot be serialized in JSON we need to convert them to a form that can. """ @@ -345,7 +353,9 @@ def _encode_state_dict(state_dict): return [(etype, state_key, v) for (etype, state_key), v in state_dict.items()] -def _decode_state_dict(input): +def _decode_state_dict( + input: Optional[List[Tuple[str, str, str]]] +) -> Optional[StateMap[str]]: """Decodes a state dict encoded using `_encode_state_dict` above""" if input is None: return None diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index ae4c8ab257..3134beb8d3 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -77,7 +77,7 @@ CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[ ] -def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): +def load_legacy_spam_checkers(hs: "synapse.server.HomeServer") -> None: """Wrapper that loads spam checkers configured using the old configuration, and registers the spam checker hooks they implement. """ @@ -129,9 +129,9 @@ def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): request_info: Collection[Tuple[str, str]], auth_provider_id: Optional[str], ) -> Union[Awaitable[RegistrationBehaviour], RegistrationBehaviour]: - # We've already made sure f is not None above, but mypy doesn't - # do well across function boundaries so we need to tell it f is - # definitely not None. + # Assertion required because mypy can't prove we won't + # change `f` back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None return f( @@ -146,9 +146,10 @@ def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): "Bad signature for callback check_registration_for_spam", ) - def run(*args, **kwargs): - # mypy doesn't do well across function boundaries so we need to tell it - # wrapped_func is definitely not None. + def run(*args: Any, **kwargs: Any) -> Awaitable: + # Assertion required because mypy can't prove we won't change `f` + # back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert wrapped_func is not None return maybe_awaitable(wrapped_func(*args, **kwargs)) @@ -165,7 +166,7 @@ def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): class SpamChecker: - def __init__(self): + def __init__(self) -> None: self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] self._user_may_join_room_callbacks: List[USER_MAY_JOIN_ROOM_CALLBACK] = [] self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = [] @@ -209,7 +210,7 @@ class SpamChecker: CHECK_REGISTRATION_FOR_SPAM_CALLBACK ] = None, check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None, - ): + ) -> None: """Register callbacks from module for each hook.""" if check_event_for_spam is not None: self._check_event_for_spam_callbacks.append(check_event_for_spam) @@ -275,7 +276,9 @@ class SpamChecker: return False - async def user_may_join_room(self, user_id: str, room_id: str, is_invited: bool): + async def user_may_join_room( + self, user_id: str, room_id: str, is_invited: bool + ) -> bool: """Checks if a given users is allowed to join a room. Not called when a user creates a room. @@ -285,7 +288,7 @@ class SpamChecker: is_invited: Whether the user is invited into the room Returns: - bool: Whether the user may join the room + Whether the user may join the room """ for callback in self._user_may_join_room_callbacks: if await callback(user_id, room_id, is_invited) is False: diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 976d9fa446..2a6dabdab6 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Awaitable, Callable, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Awaitable, Callable, List, Optional, Tuple from synapse.api.errors import SynapseError from synapse.events import EventBase @@ -38,7 +38,7 @@ CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK = Callable[ ] -def load_legacy_third_party_event_rules(hs: "HomeServer"): +def load_legacy_third_party_event_rules(hs: "HomeServer") -> None: """Wrapper that loads a third party event rules module configured using the old configuration, and registers the hooks they implement. """ @@ -77,9 +77,9 @@ def load_legacy_third_party_event_rules(hs: "HomeServer"): event: EventBase, state_events: StateMap[EventBase], ) -> Tuple[bool, Optional[dict]]: - # We've already made sure f is not None above, but mypy doesn't do well - # across function boundaries so we need to tell it f is definitely not - # None. + # Assertion required because mypy can't prove we won't change + # `f` back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None res = await f(event, state_events) @@ -98,9 +98,9 @@ def load_legacy_third_party_event_rules(hs: "HomeServer"): async def wrap_on_create_room( requester: Requester, config: dict, is_requester_admin: bool ) -> None: - # We've already made sure f is not None above, but mypy doesn't do well - # across function boundaries so we need to tell it f is definitely not - # None. + # Assertion required because mypy can't prove we won't change + # `f` back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None res = await f(requester, config, is_requester_admin) @@ -112,9 +112,10 @@ def load_legacy_third_party_event_rules(hs: "HomeServer"): return wrap_on_create_room - def run(*args, **kwargs): - # mypy doesn't do well across function boundaries so we need to tell it - # f is definitely not None. + def run(*args: Any, **kwargs: Any) -> Awaitable: + # Assertion required because mypy can't prove we won't change `f` + # back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None return maybe_awaitable(f(*args, **kwargs)) @@ -162,7 +163,7 @@ class ThirdPartyEventRules: check_visibility_can_be_modified: Optional[ CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK ] = None, - ): + ) -> None: """Register callbacks from modules for each hook.""" if check_event_allowed is not None: self._check_event_allowed_callbacks.append(check_event_allowed) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 520edbbf61..3f3eba86a8 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -13,18 +13,32 @@ # limitations under the License. import collections.abc import re -from typing import Any, Mapping, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Mapping, + Optional, + Union, +) from frozendict import frozendict from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersion +from synapse.types import JsonDict from synapse.util.async_helpers import yieldable_gather_results from synapse.util.frozenutils import unfreeze from . import EventBase +if TYPE_CHECKING: + from synapse.server import HomeServer + # Split strings on "." but not "\." This uses a negative lookbehind assertion for '\' # (?<!stuff) matches if the current position in the string is not preceded # by a match for 'stuff'. @@ -65,7 +79,7 @@ def prune_event(event: EventBase) -> EventBase: return pruned_event -def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: +def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDict: """Redacts the event_dict in the same way as `prune_event`, except it operates on dicts rather than event objects @@ -97,7 +111,7 @@ def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: new_content = {} - def add_fields(*fields): + def add_fields(*fields: str) -> None: for field in fields: if field in event_dict["content"]: new_content[field] = event_dict["content"][field] @@ -151,7 +165,7 @@ def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: allowed_fields["content"] = new_content - unsigned = {} + unsigned: JsonDict = {} allowed_fields["unsigned"] = unsigned event_unsigned = event_dict.get("unsigned", {}) @@ -164,16 +178,16 @@ def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: return allowed_fields -def _copy_field(src, dst, field): +def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None: """Copy the field in 'src' to 'dst'. For example, if src={"foo":{"bar":5}} and dst={}, and field=["foo","bar"] then dst={"foo":{"bar":5}}. Args: - src(dict): The dict to read from. - dst(dict): The dict to modify. - field(list<str>): List of keys to drill down to in 'src'. + src: The dict to read from. + dst: The dict to modify. + field: List of keys to drill down to in 'src'. """ if len(field) == 0: # this should be impossible return @@ -205,7 +219,7 @@ def _copy_field(src, dst, field): sub_out_dict[key_to_move] = sub_dict[key_to_move] -def only_fields(dictionary, fields): +def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict: """Return a new dict with only the fields in 'dictionary' which are present in 'fields'. @@ -215,11 +229,11 @@ def only_fields(dictionary, fields): A literal '.' character in a field name may be escaped using a '\'. Args: - dictionary(dict): The dictionary to read from. - fields(list<str>): A list of fields to copy over. Only shallow refs are + dictionary: The dictionary to read from. + fields: A list of fields to copy over. Only shallow refs are taken. Returns: - dict: A new dictionary with only the given fields. If fields was empty, + A new dictionary with only the given fields. If fields was empty, the same dictionary is returned. """ if len(fields) == 0: @@ -235,17 +249,17 @@ def only_fields(dictionary, fields): [f.replace(r"\.", r".") for f in field_array] for field_array in split_fields ] - output = {} + output: JsonDict = {} for field_array in split_fields: _copy_field(dictionary, output, field_array) return output -def format_event_raw(d): +def format_event_raw(d: JsonDict) -> JsonDict: return d -def format_event_for_client_v1(d): +def format_event_for_client_v1(d: JsonDict) -> JsonDict: d = format_event_for_client_v2(d) sender = d.get("sender") @@ -267,7 +281,7 @@ def format_event_for_client_v1(d): return d -def format_event_for_client_v2(d): +def format_event_for_client_v2(d: JsonDict) -> JsonDict: drop_keys = ( "auth_events", "prev_events", @@ -282,37 +296,37 @@ def format_event_for_client_v2(d): return d -def format_event_for_client_v2_without_room_id(d): +def format_event_for_client_v2_without_room_id(d: JsonDict) -> JsonDict: d = format_event_for_client_v2(d) d.pop("room_id", None) return d def serialize_event( - e, - time_now_ms, - as_client_event=True, - event_format=format_event_for_client_v1, - token_id=None, - only_event_fields=None, - include_stripped_room_state=False, -): + e: Union[JsonDict, EventBase], + time_now_ms: int, + as_client_event: bool = True, + event_format: Callable[[JsonDict], JsonDict] = format_event_for_client_v1, + token_id: Optional[str] = None, + only_event_fields: Optional[List[str]] = None, + include_stripped_room_state: bool = False, +) -> JsonDict: """Serialize event for clients Args: - e (EventBase) - time_now_ms (int) - as_client_event (bool) + e + time_now_ms + as_client_event event_format token_id only_event_fields - include_stripped_room_state (bool): Some events can have stripped room state + include_stripped_room_state: Some events can have stripped room state stored in the `unsigned` field. This is required for invite and knock functionality. If this option is False, that state will be removed from the event before it is returned. Otherwise, it will be kept. Returns: - dict + The serialized event dictionary. """ # FIXME(erikj): To handle the case of presence events and the like @@ -369,25 +383,27 @@ class EventClientSerializer: clients. """ - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() - self.experimental_msc1849_support_enabled = ( - hs.config.server.experimental_msc1849_support_enabled - ) + self._msc1849_enabled = hs.config.experimental.msc1849_enabled async def serialize_event( - self, event, time_now, bundle_aggregations=True, **kwargs - ): + self, + event: Union[JsonDict, EventBase], + time_now: int, + bundle_aggregations: bool = True, + **kwargs: Any, + ) -> JsonDict: """Serializes a single event. Args: - event (EventBase) - time_now (int): The current time in milliseconds - bundle_aggregations (bool): Whether to bundle in related events + event + time_now: The current time in milliseconds + bundle_aggregations: Whether to bundle in related events **kwargs: Arguments to pass to `serialize_event` Returns: - dict: The serialized event + The serialized event """ # To handle the case of presence events and the like if not isinstance(event, EventBase): @@ -400,7 +416,7 @@ class EventClientSerializer: # we need to bundle in with the event. # Do not bundle relations if the event has been redacted if not event.internal_metadata.is_redacted() and ( - self.experimental_msc1849_support_enabled and bundle_aggregations + self._msc1849_enabled and bundle_aggregations ): annotations = await self.store.get_aggregation_groups_for_event(event_id) references = await self.store.get_relations_for_event( @@ -448,25 +464,27 @@ class EventClientSerializer: return serialized_event - def serialize_events(self, events, time_now, **kwargs): + async def serialize_events( + self, events: Iterable[Union[JsonDict, EventBase]], time_now: int, **kwargs: Any + ) -> List[JsonDict]: """Serializes multiple events. Args: - event (iter[EventBase]) - time_now (int): The current time in milliseconds + event + time_now: The current time in milliseconds **kwargs: Arguments to pass to `serialize_event` Returns: - Deferred[list[dict]]: The list of serialized events + The list of serialized events """ - return yieldable_gather_results( + return await yieldable_gather_results( self.serialize_event, events, time_now=time_now, **kwargs ) def copy_power_levels_contents( old_power_levels: Mapping[str, Union[int, Mapping[str, int]]] -): +) -> Dict[str, Union[int, Dict[str, int]]]: """Copy the content of a power_levels event, unfreezing frozendicts along the way Raises: @@ -475,7 +493,7 @@ def copy_power_levels_contents( if not isinstance(old_power_levels, collections.abc.Mapping): raise TypeError("Not a valid power-levels content: %r" % (old_power_levels,)) - power_levels = {} + power_levels: Dict[str, Union[int, Dict[str, int]]] = {} for k, v in old_power_levels.items(): if isinstance(v, int): @@ -483,7 +501,8 @@ def copy_power_levels_contents( continue if isinstance(v, collections.abc.Mapping): - power_levels[k] = h = {} + h: Dict[str, int] = {} + power_levels[k] = h for k1, v1 in v.items(): # we should only have one level of nesting if not isinstance(v1, int): @@ -498,7 +517,7 @@ def copy_power_levels_contents( return power_levels -def validate_canonicaljson(value: Any): +def validate_canonicaljson(value: Any) -> None: """ Ensure that the JSON object is valid according to the rules of canonical JSON. diff --git a/synapse/events/validator.py b/synapse/events/validator.py index 6eb6544c4c..4d459c17f1 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import collections.abc -from typing import Union +from typing import Iterable, Union import jsonschema @@ -28,11 +28,11 @@ from synapse.events.utils import ( validate_canonicaljson, ) from synapse.federation.federation_server import server_matches_acl_event -from synapse.types import EventID, RoomID, UserID +from synapse.types import EventID, JsonDict, RoomID, UserID class EventValidator: - def validate_new(self, event: EventBase, config: HomeServerConfig): + def validate_new(self, event: EventBase, config: HomeServerConfig) -> None: """Validates the event has roughly the right format Args: @@ -116,7 +116,7 @@ class EventValidator: errcode=Codes.BAD_JSON, ) - def _validate_retention(self, event: EventBase): + def _validate_retention(self, event: EventBase) -> None: """Checks that an event that defines the retention policy for a room respects the format enforced by the spec. @@ -156,7 +156,7 @@ class EventValidator: errcode=Codes.BAD_JSON, ) - def validate_builder(self, event: Union[EventBase, EventBuilder]): + def validate_builder(self, event: Union[EventBase, EventBuilder]) -> None: """Validates that the builder/event has roughly the right format. Only checks values that we expect a proto event to have, rather than all the fields an event would have @@ -204,14 +204,14 @@ class EventValidator: self._ensure_state_event(event) - def _ensure_strings(self, d, keys): + def _ensure_strings(self, d: JsonDict, keys: Iterable[str]) -> None: for s in keys: if s not in d: raise SynapseError(400, "'%s' not in content" % (s,)) if not isinstance(d[s], str): raise SynapseError(400, "'%s' not a string type" % (s,)) - def _ensure_state_event(self, event): + def _ensure_state_event(self, event: Union[EventBase, EventBuilder]) -> None: if not event.is_state(): raise SynapseError(400, "'%s' must be state events" % (event.type,)) @@ -244,7 +244,9 @@ POWER_LEVELS_SCHEMA = { } -def _create_power_level_validator(): +# This could return something newer than Draft 7, but that's the current "latest" +# validator. +def _create_power_level_validator() -> jsonschema.Draft7Validator: validator = jsonschema.validators.validator_for(POWER_LEVELS_SCHEMA) # by default jsonschema does not consider a frozendict to be an object so diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index f4612a5b92..ebe75a9e9b 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -200,46 +200,13 @@ class AuthHandler: self.bcrypt_rounds = hs.config.registration.bcrypt_rounds - # we can't use hs.get_module_api() here, because to do so will create an - # import loop. - # - # TODO: refactor this class to separate the lower-level stuff that - # ModuleApi can use from the higher-level stuff that uses ModuleApi, as - # better way to break the loop - account_handler = ModuleApi(hs, self) - - self.password_providers = [ - PasswordProvider.load(module, config, account_handler) - for module, config in hs.config.authproviders.password_providers - ] - - logger.info("Extra password_providers: %s", self.password_providers) + self.password_auth_provider = hs.get_password_auth_provider() self.hs = hs # FIXME better possibility to access registrationHandler later? self.macaroon_gen = hs.get_macaroon_generator() self._password_enabled = hs.config.auth.password_enabled self._password_localdb_enabled = hs.config.auth.password_localdb_enabled - # start out by assuming PASSWORD is enabled; we will remove it later if not. - login_types = set() - if self._password_localdb_enabled: - login_types.add(LoginType.PASSWORD) - - for provider in self.password_providers: - login_types.update(provider.get_supported_login_types().keys()) - - if not self._password_enabled: - login_types.discard(LoginType.PASSWORD) - - # Some clients just pick the first type in the list. In this case, we want - # them to use PASSWORD (rather than token or whatever), so we want to make sure - # that comes first, where it's present. - self._supported_login_types = [] - if LoginType.PASSWORD in login_types: - self._supported_login_types.append(LoginType.PASSWORD) - login_types.remove(LoginType.PASSWORD) - self._supported_login_types.extend(login_types) - # Ratelimiter for failed auth during UIA. Uses same ratelimit config # as per `rc_login.failed_attempts`. self._failed_uia_attempts_ratelimiter = Ratelimiter( @@ -427,11 +394,10 @@ class AuthHandler: ui_auth_types.add(LoginType.PASSWORD) # also allow auth from password providers - for provider in self.password_providers: - for t in provider.get_supported_login_types().keys(): - if t == LoginType.PASSWORD and not self._password_enabled: - continue - ui_auth_types.add(t) + for t in self.password_auth_provider.get_supported_login_types().keys(): + if t == LoginType.PASSWORD and not self._password_enabled: + continue + ui_auth_types.add(t) # if sso is enabled, allow the user to log in via SSO iff they have a mapping # from sso to mxid. @@ -1038,7 +1004,25 @@ class AuthHandler: Returns: login types """ - return self._supported_login_types + # Load any login types registered by modules + # This is stored in the password_auth_provider so this doesn't trigger + # any callbacks + types = list(self.password_auth_provider.get_supported_login_types().keys()) + + # This list should include PASSWORD if (either _password_localdb_enabled is + # true or if one of the modules registered it) AND _password_enabled is true + # Also: + # Some clients just pick the first type in the list. In this case, we want + # them to use PASSWORD (rather than token or whatever), so we want to make sure + # that comes first, where it's present. + if LoginType.PASSWORD in types: + types.remove(LoginType.PASSWORD) + if self._password_enabled: + types.insert(0, LoginType.PASSWORD) + elif self._password_localdb_enabled and self._password_enabled: + types.insert(0, LoginType.PASSWORD) + + return types async def validate_login( self, @@ -1217,15 +1201,20 @@ class AuthHandler: known_login_type = False - for provider in self.password_providers: - supported_login_types = provider.get_supported_login_types() - if login_type not in supported_login_types: - # this password provider doesn't understand this login type - continue - + # Check if login_type matches a type registered by one of the modules + # We don't need to remove LoginType.PASSWORD from the list if password login is + # disabled, since if that were the case then by this point we know that the + # login_type is not LoginType.PASSWORD + supported_login_types = self.password_auth_provider.get_supported_login_types() + # check if the login type being used is supported by a module + if login_type in supported_login_types: + # Make a note that this login type is supported by the server known_login_type = True + # Get all the fields expected for this login types login_fields = supported_login_types[login_type] + # go through the login submission and keep track of which required fields are + # provided/not provided missing_fields = [] login_dict = {} for f in login_fields: @@ -1233,6 +1222,7 @@ class AuthHandler: missing_fields.append(f) else: login_dict[f] = login_submission[f] + # raise an error if any of the expected fields for that login type weren't provided if missing_fields: raise SynapseError( 400, @@ -1240,10 +1230,15 @@ class AuthHandler: % (login_type, missing_fields), ) - result = await provider.check_auth(username, login_type, login_dict) + # call all of the check_auth hooks for that login_type + # it will return a result once the first success is found (or None otherwise) + result = await self.password_auth_provider.check_auth( + username, login_type, login_dict + ) if result: return result + # if no module managed to authenticate the user, then fallback to built in password based auth if login_type == LoginType.PASSWORD and self._password_localdb_enabled: known_login_type = True @@ -1282,11 +1277,16 @@ class AuthHandler: completed login/registration, or `None`. If authentication was unsuccessful, `user_id` and `callback` are both `None`. """ - for provider in self.password_providers: - result = await provider.check_3pid_auth(medium, address, password) - if result: - return result + # call all of the check_3pid_auth callbacks + # Result will be from the first callback that returns something other than None + # If all the callbacks return None, then result is also set to None + result = await self.password_auth_provider.check_3pid_auth( + medium, address, password + ) + if result: + return result + # if result is None then return (None, None) return None, None async def _check_local_password(self, user_id: str, password: str) -> Optional[str]: @@ -1365,13 +1365,12 @@ class AuthHandler: user_info = await self.auth.get_user_by_access_token(access_token) await self.store.delete_access_token(access_token) - # see if any of our auth providers want to know about this - for provider in self.password_providers: - await provider.on_logged_out( - user_id=user_info.user_id, - device_id=user_info.device_id, - access_token=access_token, - ) + # see if any modules want to know about this + await self.password_auth_provider.on_logged_out( + user_id=user_info.user_id, + device_id=user_info.device_id, + access_token=access_token, + ) # delete pushers associated with this access token if user_info.token_id is not None: @@ -1398,12 +1397,11 @@ class AuthHandler: user_id, except_token_id=except_token_id, device_id=device_id ) - # see if any of our auth providers want to know about this - for provider in self.password_providers: - for token, _, device_id in tokens_and_devices: - await provider.on_logged_out( - user_id=user_id, device_id=device_id, access_token=token - ) + # see if any modules want to know about this + for token, _, device_id in tokens_and_devices: + await self.password_auth_provider.on_logged_out( + user_id=user_id, device_id=device_id, access_token=token + ) # delete pushers associated with the access tokens await self.hs.get_pusherpool().remove_pushers_by_access_token( @@ -1811,40 +1809,228 @@ class MacaroonGenerator: return macaroon -class PasswordProvider: - """Wrapper for a password auth provider module +def load_legacy_password_auth_providers(hs: "HomeServer") -> None: + module_api = hs.get_module_api() + for module, config in hs.config.authproviders.password_providers: + load_single_legacy_password_auth_provider( + module=module, config=config, api=module_api + ) - This class abstracts out all of the backwards-compatibility hacks for - password providers, to provide a consistent interface. - """ - @classmethod - def load( - cls, module: Type, config: JsonDict, module_api: ModuleApi - ) -> "PasswordProvider": - try: - pp = module(config=config, account_handler=module_api) - except Exception as e: - logger.error("Error while initializing %r: %s", module, e) - raise - return cls(pp, module_api) +def load_single_legacy_password_auth_provider( + module: Type, config: JsonDict, api: ModuleApi +) -> None: + try: + provider = module(config=config, account_handler=api) + except Exception as e: + logger.error("Error while initializing %r: %s", module, e) + raise + + # The known hooks. If a module implements a method who's name appears in this set + # we'll want to register it + password_auth_provider_methods = { + "check_3pid_auth", + "on_logged_out", + } + + # All methods that the module provides should be async, but this wasn't enforced + # in the old module system, so we wrap them if needed + def async_wrapper(f: Optional[Callable]) -> Optional[Callable[..., Awaitable]]: + # f might be None if the callback isn't implemented by the module. In this + # case we don't want to register a callback at all so we return None. + if f is None: + return None + + # We need to wrap check_password because its old form would return a boolean + # but we now want it to behave just like check_auth() and return the matrix id of + # the user if authentication succeeded or None otherwise + if f.__name__ == "check_password": + + async def wrapped_check_password( + username: str, login_type: str, login_dict: JsonDict + ) -> Optional[Tuple[str, Optional[Callable]]]: + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None + + matrix_user_id = api.get_qualified_user_id(username) + password = login_dict["password"] + + is_valid = await f(matrix_user_id, password) + + if is_valid: + return matrix_user_id, None + + return None - def __init__(self, pp: "PasswordProvider", module_api: ModuleApi): - self._pp = pp - self._module_api = module_api + return wrapped_check_password + + # We need to wrap check_auth as in the old form it could return + # just a str, but now it must return Optional[Tuple[str, Optional[Callable]] + if f.__name__ == "check_auth": + + async def wrapped_check_auth( + username: str, login_type: str, login_dict: JsonDict + ) -> Optional[Tuple[str, Optional[Callable]]]: + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None + + result = await f(username, login_type, login_dict) + + if isinstance(result, str): + return result, None + + return result + + return wrapped_check_auth + + # We need to wrap check_3pid_auth as in the old form it could return + # just a str, but now it must return Optional[Tuple[str, Optional[Callable]] + if f.__name__ == "check_3pid_auth": + + async def wrapped_check_3pid_auth( + medium: str, address: str, password: str + ) -> Optional[Tuple[str, Optional[Callable]]]: + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None + + result = await f(medium, address, password) + + if isinstance(result, str): + return result, None + + return result - self._supported_login_types = {} + return wrapped_check_3pid_auth - # grandfather in check_password support - if hasattr(self._pp, "check_password"): - self._supported_login_types[LoginType.PASSWORD] = ("password",) + def run(*args: Tuple, **kwargs: Dict) -> Awaitable: + # mypy doesn't do well across function boundaries so we need to tell it + # f is definitely not None. + assert f is not None - g = getattr(self._pp, "get_supported_login_types", None) - if g: - self._supported_login_types.update(g()) + return maybe_awaitable(f(*args, **kwargs)) - def __str__(self) -> str: - return str(self._pp) + return run + + # populate hooks with the implemented methods, wrapped with async_wrapper + hooks = { + hook: async_wrapper(getattr(provider, hook, None)) + for hook in password_auth_provider_methods + } + + supported_login_types = {} + # call get_supported_login_types and add that to the dict + g = getattr(provider, "get_supported_login_types", None) + if g is not None: + # Note the old module style also called get_supported_login_types at loading time + # and it is synchronous + supported_login_types.update(g()) + + auth_checkers = {} + # Legacy modules have a check_auth method which expects to be called with one of + # the keys returned by get_supported_login_types. New style modules register a + # dictionary of login_type->check_auth_method mappings + check_auth = async_wrapper(getattr(provider, "check_auth", None)) + if check_auth is not None: + for login_type, fields in supported_login_types.items(): + # need tuple(fields) since fields can be any Iterable type (so may not be hashable) + auth_checkers[(login_type, tuple(fields))] = check_auth + + # if it has a "check_password" method then it should handle all auth checks + # with login type of LoginType.PASSWORD + check_password = async_wrapper(getattr(provider, "check_password", None)) + if check_password is not None: + # need to use a tuple here for ("password",) not a list since lists aren't hashable + auth_checkers[(LoginType.PASSWORD, ("password",))] = check_password + + api.register_password_auth_provider_callbacks(hooks, auth_checkers=auth_checkers) + + +CHECK_3PID_AUTH_CALLBACK = Callable[ + [str, str, str], + Awaitable[ + Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]] + ], +] +ON_LOGGED_OUT_CALLBACK = Callable[[str, Optional[str], str], Awaitable] +CHECK_AUTH_CALLBACK = Callable[ + [str, str, JsonDict], + Awaitable[ + Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]] + ], +] + + +class PasswordAuthProvider: + """ + A class that the AuthHandler calls when authenticating users + It allows modules to provide alternative methods for authentication + """ + + def __init__(self) -> None: + # lists of callbacks + self.check_3pid_auth_callbacks: List[CHECK_3PID_AUTH_CALLBACK] = [] + self.on_logged_out_callbacks: List[ON_LOGGED_OUT_CALLBACK] = [] + + # Mapping from login type to login parameters + self._supported_login_types: Dict[str, Iterable[str]] = {} + + # Mapping from login type to auth checker callbacks + self.auth_checker_callbacks: Dict[str, List[CHECK_AUTH_CALLBACK]] = {} + + def register_password_auth_provider_callbacks( + self, + check_3pid_auth: Optional[CHECK_3PID_AUTH_CALLBACK] = None, + on_logged_out: Optional[ON_LOGGED_OUT_CALLBACK] = None, + auth_checkers: Optional[Dict[Tuple[str, Tuple], CHECK_AUTH_CALLBACK]] = None, + ) -> None: + # Register check_3pid_auth callback + if check_3pid_auth is not None: + self.check_3pid_auth_callbacks.append(check_3pid_auth) + + # register on_logged_out callback + if on_logged_out is not None: + self.on_logged_out_callbacks.append(on_logged_out) + + if auth_checkers is not None: + # register a new supported login_type + # Iterate through all of the types being registered + for (login_type, fields), callback in auth_checkers.items(): + # Note: fields may be empty here. This would allow a modules auth checker to + # be called with just 'login_type' and no password or other secrets + + # Need to check that all the field names are strings or may get nasty errors later + for f in fields: + if not isinstance(f, str): + raise RuntimeError( + "A module tried to register support for login type: %s with parameters %s" + " but all parameter names must be strings" + % (login_type, fields) + ) + + # 2 modules supporting the same login type must expect the same fields + # e.g. 1 can't expect "pass" if the other expects "password" + # so throw an exception if that happens + if login_type not in self._supported_login_types.get(login_type, []): + self._supported_login_types[login_type] = fields + else: + fields_currently_supported = self._supported_login_types.get( + login_type + ) + if fields_currently_supported != fields: + raise RuntimeError( + "A module tried to register support for login type: %s with parameters %s" + " but another module had already registered support for that type with parameters %s" + % (login_type, fields, fields_currently_supported) + ) + + # Add the new method to the list of auth_checker_callbacks for this login type + self.auth_checker_callbacks.setdefault(login_type, []).append(callback) def get_supported_login_types(self) -> Mapping[str, Iterable[str]]: """Get the login types supported by this password provider @@ -1852,20 +2038,15 @@ class PasswordProvider: Returns a map from a login type identifier (such as m.login.password) to an iterable giving the fields which must be provided by the user in the submission to the /login API. - - This wrapper adds m.login.password to the list if the underlying password - provider supports the check_password() api. """ + return self._supported_login_types async def check_auth( self, username: str, login_type: str, login_dict: JsonDict - ) -> Optional[Tuple[str, Optional[Callable]]]: + ) -> Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]]: """Check if the user has presented valid login credentials - This wrapper also calls check_password() if the underlying password provider - supports the check_password() api and the login type is m.login.password. - Args: username: user id presented by the client. Either an MXID or an unqualified username. @@ -1879,63 +2060,130 @@ class PasswordProvider: user, and `callback` is an optional callback which will be called with the result from the /login call (including access_token, device_id, etc.) """ - # first grandfather in a call to check_password - if login_type == LoginType.PASSWORD: - check_password = getattr(self._pp, "check_password", None) - if check_password: - qualified_user_id = self._module_api.get_qualified_user_id(username) - is_valid = await check_password( - qualified_user_id, login_dict["password"] - ) - if is_valid: - return qualified_user_id, None - check_auth = getattr(self._pp, "check_auth", None) - if not check_auth: - return None - result = await check_auth(username, login_type, login_dict) + # Go through all callbacks for the login type until one returns with a value + # other than None (i.e. until a callback returns a success) + for callback in self.auth_checker_callbacks[login_type]: + try: + result = await callback(username, login_type, login_dict) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue - # Check if the return value is a str or a tuple - if isinstance(result, str): - # If it's a str, set callback function to None - return result, None + if result is not None: + # Check that the callback returned a Tuple[str, Optional[Callable]] + # "type: ignore[unreachable]" is used after some isinstance checks because mypy thinks + # result is always the right type, but as it is 3rd party code it might not be + + if not isinstance(result, tuple) or len(result) != 2: + logger.warning( + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue - return result + # pull out the two parts of the tuple so we can do type checking + str_result, callback_result = result + + # the 1st item in the tuple should be a str + if not isinstance(str_result, str): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # the second should be Optional[Callable] + if callback_result is not None: + if not callable(callback_result): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # The result is a (str, Optional[callback]) tuple so return the successful result + return result + + # If this point has been reached then none of the callbacks successfully authenticated + # the user so return None + return None async def check_3pid_auth( self, medium: str, address: str, password: str - ) -> Optional[Tuple[str, Optional[Callable]]]: - g = getattr(self._pp, "check_3pid_auth", None) - if not g: - return None - + ) -> Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]]: # This function is able to return a deferred that either # resolves None, meaning authentication failure, or upon # success, to a str (which is the user_id) or a tuple of # (user_id, callback_func), where callback_func should be run # after we've finished everything else - result = await g(medium, address, password) - # Check if the return value is a str or a tuple - if isinstance(result, str): - # If it's a str, set callback function to None - return result, None + for callback in self.check_3pid_auth_callbacks: + try: + result = await callback(medium, address, password) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue - return result + if result is not None: + # Check that the callback returned a Tuple[str, Optional[Callable]] + # "type: ignore[unreachable]" is used after some isinstance checks because mypy thinks + # result is always the right type, but as it is 3rd party code it might not be + + if not isinstance(result, tuple) or len(result) != 2: + logger.warning( + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # pull out the two parts of the tuple so we can do type checking + str_result, callback_result = result + + # the 1st item in the tuple should be a str + if not isinstance(str_result, str): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # the second should be Optional[Callable] + if callback_result is not None: + if not callable(callback_result): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # The result is a (str, Optional[callback]) tuple so return the successful result + return result + + # If this point has been reached then none of the callbacks successfully authenticated + # the user so return None + return None async def on_logged_out( self, user_id: str, device_id: Optional[str], access_token: str ) -> None: - g = getattr(self._pp, "on_logged_out", None) - if not g: - return - # This might return an awaitable, if it does block the log out - # until it completes. - await maybe_awaitable( - g( - user_id=user_id, - device_id=device_id, - access_token=access_token, - ) - ) + # call all of the on_logged_out callbacks + for callback in self.on_logged_out_callbacks: + try: + callback(user_id, device_id, access_token) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 75e6019760..6eafbea25d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -14,7 +14,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + Iterable, + List, + Mapping, + Optional, + Set, + Tuple, +) from synapse.api import errors from synapse.api.constants import EventTypes @@ -595,7 +606,7 @@ class DeviceHandler(DeviceWorkerHandler): def _update_device_from_client_ips( - device: JsonDict, client_ips: Dict[Tuple[str, str], JsonDict] + device: JsonDict, client_ips: Mapping[Tuple[str, str], Mapping[str, Any]] ) -> None: ip = client_ips.get((device["user_id"], device["device_id"]), {}) device.update({"last_seen_ts": ip.get("last_seen"), "last_seen_ip": ip.get("ip")}) diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index d089c56286..365063ebdf 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -55,8 +55,7 @@ class EventAuthHandler: """Check an event passes the auth rules at its own auth events""" auth_event_ids = event.auth_event_ids() auth_events_by_id = await self._store.get_events(auth_event_ids) - auth_events = {(e.type, e.state_key): e for e in auth_events_by_id.values()} - check_auth_rules_for_event(room_version_obj, event, auth_events) + check_auth_rules_for_event(room_version_obj, event, auth_events_by_id.values()) def compute_auth_events( self, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3e341bd287..3112cc88b1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -15,7 +15,6 @@ """Contains handlers for federation events.""" -import itertools import logging from http import HTTPStatus from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union @@ -27,12 +26,7 @@ from unpaddedbase64 import decode_base64 from twisted.internet import defer from synapse import event_auth -from synapse.api.constants import ( - EventContentFields, - EventTypes, - Membership, - RejectedReason, -) +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.api.errors import ( AuthError, CodeMessageException, @@ -43,12 +37,9 @@ from synapse.api.errors import ( RequestSendFailed, SynapseError, ) -from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion, RoomVersions +from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.crypto.event_signing import compute_event_signature -from synapse.event_auth import ( - check_auth_rules_for_event, - validate_event_for_room_version, -) +from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.events.validator import EventValidator @@ -238,18 +229,10 @@ class FederationHandler: ) return False - logger.debug( - "room_id: %s, backfill: current_depth: %s, max_depth: %s, extrems: %s", - room_id, - current_depth, - max_depth, - sorted_extremeties_tuple, - ) - # We ignore extremities that have a greater depth than our current depth # as: # 1. we don't really care about getting events that have happened - # before our current position; and + # after our current position; and # 2. we have likely previously tried and failed to backfill from that # extremity, so to avoid getting "stuck" requesting the same # backfill repeatedly we drop those extremities. @@ -257,9 +240,19 @@ class FederationHandler: t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth ] + logger.debug( + "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems: %s filtered_sorted_extremeties_tuple: %s", + room_id, + current_depth, + limit, + max_depth, + sorted_extremeties_tuple, + filtered_sorted_extremeties_tuple, + ) + # However, we need to check that the filtered extremities are non-empty. # If they are empty then either we can a) bail or b) still attempt to - # backill. We opt to try backfilling anyway just in case we do get + # backfill. We opt to try backfilling anyway just in case we do get # relevant events. if filtered_sorted_extremeties_tuple: sorted_extremeties_tuple = filtered_sorted_extremeties_tuple @@ -389,7 +382,7 @@ class FederationHandler: for key, state_dict in states.items() } - for e_id, _ in sorted_extremeties_tuple: + for e_id in event_ids: likely_extremeties_domains = get_domains_from_state(states[e_id]) success = await try_backfill( @@ -517,7 +510,7 @@ class FederationHandler: auth_events=auth_chain, ) - max_stream_id = await self._persist_auth_tree( + max_stream_id = await self._federation_event_handler.process_remote_join( origin, room_id, auth_chain, state, event, room_version_obj ) @@ -1093,119 +1086,6 @@ class FederationHandler: else: return None - async def _persist_auth_tree( - self, - origin: str, - room_id: str, - auth_events: List[EventBase], - state: List[EventBase], - event: EventBase, - room_version: RoomVersion, - ) -> int: - """Checks the auth chain is valid (and passes auth checks) for the - state and event. Then persists the auth chain and state atomically. - Persists the event separately. Notifies about the persisted events - where appropriate. - - Will attempt to fetch missing auth events. - - Args: - origin: Where the events came from - room_id, - auth_events - state - event - room_version: The room version we expect this room to have, and - will raise if it doesn't match the version in the create event. - """ - events_to_context = {} - for e in itertools.chain(auth_events, state): - e.internal_metadata.outlier = True - events_to_context[e.event_id] = EventContext.for_outlier() - - event_map = { - e.event_id: e for e in itertools.chain(auth_events, state, [event]) - } - - create_event = None - for e in auth_events: - if (e.type, e.state_key) == (EventTypes.Create, ""): - create_event = e - break - - if create_event is None: - # If the state doesn't have a create event then the room is - # invalid, and it would fail auth checks anyway. - raise SynapseError(400, "No create event in state") - - room_version_id = create_event.content.get( - "room_version", RoomVersions.V1.identifier - ) - - if room_version.identifier != room_version_id: - raise SynapseError(400, "Room version mismatch") - - missing_auth_events = set() - for e in itertools.chain(auth_events, state, [event]): - for e_id in e.auth_event_ids(): - if e_id not in event_map: - missing_auth_events.add(e_id) - - for e_id in missing_auth_events: - m_ev = await self.federation_client.get_pdu( - [origin], - e_id, - room_version=room_version, - outlier=True, - timeout=10000, - ) - if m_ev and m_ev.event_id == e_id: - event_map[e_id] = m_ev - else: - logger.info("Failed to find auth event %r", e_id) - - for e in itertools.chain(auth_events, state, [event]): - auth_for_e = { - (event_map[e_id].type, event_map[e_id].state_key): event_map[e_id] - for e_id in e.auth_event_ids() - if e_id in event_map - } - if create_event: - auth_for_e[(EventTypes.Create, "")] = create_event - - try: - validate_event_for_room_version(room_version, e) - check_auth_rules_for_event(room_version, e, auth_for_e) - except SynapseError as err: - # we may get SynapseErrors here as well as AuthErrors. For - # instance, there are a couple of (ancient) events in some - # rooms whose senders do not have the correct sigil; these - # cause SynapseErrors in auth.check. We don't want to give up - # the attempt to federate altogether in such cases. - - logger.warning("Rejecting %s because %s", e.event_id, err.msg) - - if e == event: - raise - events_to_context[e.event_id].rejected = RejectedReason.AUTH_ERROR - - if auth_events or state: - await self._federation_event_handler.persist_events_and_notify( - room_id, - [ - (e, events_to_context[e.event_id]) - for e in itertools.chain(auth_events, state) - ], - ) - - new_event_context = await self.state_handler.compute_event_context( - event, old_state=state - ) - - return await self._federation_event_handler.persist_events_and_notify( - room_id, [(event, new_event_context)] - ) - async def on_get_missing_events( self, origin: str, diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index f640b417b3..5a2f2e5ebb 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import itertools import logging from http import HTTPStatus from typing import ( @@ -45,7 +46,7 @@ from synapse.api.errors import ( RequestSendFailed, SynapseError, ) -from synapse.api.room_versions import KNOWN_ROOM_VERSIONS +from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion, RoomVersions from synapse.event_auth import ( auth_types_for_event, check_auth_rules_for_event, @@ -214,7 +215,7 @@ class FederationEventHandler: if missing_prevs: # We only backfill backwards to the min depth. - min_depth = await self.get_min_depth_for_context(pdu.room_id) + min_depth = await self._store.get_min_depth(pdu.room_id) logger.debug("min_depth: %d", min_depth) if min_depth is not None and pdu.depth > min_depth: @@ -390,9 +391,122 @@ class FederationEventHandler: prev_member_event, ) + async def process_remote_join( + self, + origin: str, + room_id: str, + auth_events: List[EventBase], + state: List[EventBase], + event: EventBase, + room_version: RoomVersion, + ) -> int: + """Persists the events returned by a send_join + + Checks the auth chain is valid (and passes auth checks) for the + state and event. Then persists the auth chain and state atomically. + Persists the event separately. Notifies about the persisted events + where appropriate. + + Will attempt to fetch missing auth events. + + Args: + origin: Where the events came from + room_id, + auth_events + state + event + room_version: The room version we expect this room to have, and + will raise if it doesn't match the version in the create event. + """ + events_to_context = {} + for e in itertools.chain(auth_events, state): + e.internal_metadata.outlier = True + events_to_context[e.event_id] = EventContext.for_outlier() + + event_map = { + e.event_id: e for e in itertools.chain(auth_events, state, [event]) + } + + create_event = None + for e in auth_events: + if (e.type, e.state_key) == (EventTypes.Create, ""): + create_event = e + break + + if create_event is None: + # If the state doesn't have a create event then the room is + # invalid, and it would fail auth checks anyway. + raise SynapseError(400, "No create event in state") + + room_version_id = create_event.content.get( + "room_version", RoomVersions.V1.identifier + ) + + if room_version.identifier != room_version_id: + raise SynapseError(400, "Room version mismatch") + + missing_auth_events = set() + for e in itertools.chain(auth_events, state, [event]): + for e_id in e.auth_event_ids(): + if e_id not in event_map: + missing_auth_events.add(e_id) + + for e_id in missing_auth_events: + m_ev = await self._federation_client.get_pdu( + [origin], + e_id, + room_version=room_version, + outlier=True, + timeout=10000, + ) + if m_ev and m_ev.event_id == e_id: + event_map[e_id] = m_ev + else: + logger.info("Failed to find auth event %r", e_id) + + for e in itertools.chain(auth_events, state, [event]): + auth_for_e = [ + event_map[e_id] for e_id in e.auth_event_ids() if e_id in event_map + ] + if create_event: + auth_for_e.append(create_event) + + try: + validate_event_for_room_version(room_version, e) + check_auth_rules_for_event(room_version, e, auth_for_e) + except SynapseError as err: + # we may get SynapseErrors here as well as AuthErrors. For + # instance, there are a couple of (ancient) events in some + # rooms whose senders do not have the correct sigil; these + # cause SynapseErrors in auth.check. We don't want to give up + # the attempt to federate altogether in such cases. + + logger.warning("Rejecting %s because %s", e.event_id, err.msg) + + if e == event: + raise + events_to_context[e.event_id].rejected = RejectedReason.AUTH_ERROR + + if auth_events or state: + await self.persist_events_and_notify( + room_id, + [ + (e, events_to_context[e.event_id]) + for e in itertools.chain(auth_events, state) + ], + ) + + new_event_context = await self._state_handler.compute_event_context( + event, old_state=state + ) + + return await self.persist_events_and_notify( + room_id, [(event, new_event_context)] + ) + @log_function async def backfill( - self, dest: str, room_id: str, limit: int, extremities: List[str] + self, dest: str, room_id: str, limit: int, extremities: Iterable[str] ) -> None: """Trigger a backfill request to `dest` for the given `room_id` @@ -1116,14 +1230,12 @@ class FederationEventHandler: await concurrently_execute(get_event, event_ids, 5) logger.info("Fetched %i events of %i requested", len(events), len(event_ids)) - await self._auth_and_persist_fetched_events(destination, room_id, events) + await self._auth_and_persist_outliers(room_id, events) - async def _auth_and_persist_fetched_events( - self, origin: str, room_id: str, events: Iterable[EventBase] + async def _auth_and_persist_outliers( + self, room_id: str, events: Iterable[EventBase] ) -> None: - """Persist the events fetched by _get_events_and_persist or _get_remote_auth_chain_for_event - - The events to be persisted must be outliers. + """Persist a batch of outlier events fetched from remote servers. We first sort the events to make sure that we process each event's auth_events before the event itself, and then auth and persist them. @@ -1131,7 +1243,6 @@ class FederationEventHandler: Notifies about the events where appropriate. Params: - origin: where the events came from room_id: the room that the events are meant to be in (though this has not yet been checked) events: the events that have been fetched @@ -1167,15 +1278,15 @@ class FederationEventHandler: shortstr(e.event_id for e in roots), ) - await self._auth_and_persist_fetched_events_inner(origin, room_id, roots) + await self._auth_and_persist_outliers_inner(room_id, roots) for ev in roots: del event_map[ev.event_id] - async def _auth_and_persist_fetched_events_inner( - self, origin: str, room_id: str, fetched_events: Collection[EventBase] + async def _auth_and_persist_outliers_inner( + self, room_id: str, fetched_events: Collection[EventBase] ) -> None: - """Helper for _auth_and_persist_fetched_events + """Helper for _auth_and_persist_outliers Persists a batch of events where we have (theoretically) already persisted all of their auth events. @@ -1203,7 +1314,7 @@ class FederationEventHandler: def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: with nested_logging_context(suffix=event.event_id): - auth = {} + auth = [] for auth_event_id in event.auth_event_ids(): ae = persisted_events.get(auth_event_id) if not ae: @@ -1216,7 +1327,7 @@ class FederationEventHandler: # exist, which means it is premature to reject `event`. Instead we # just ignore it for now. return None - auth[(ae.type, ae.state_key)] = ae + auth.append(ae) context = EventContext.for_outlier() try: @@ -1256,6 +1367,10 @@ class FederationEventHandler: Returns: The updated context object. + + Raises: + AuthError if we were unable to find copies of the event's auth events. + (Most other failures just cause us to set `context.rejected`.) """ # This method should only be used for non-outliers assert not event.internal_metadata.outlier @@ -1272,7 +1387,26 @@ class FederationEventHandler: context.rejected = RejectedReason.AUTH_ERROR return context - # calculate what the auth events *should* be, to use as a basis for auth. + # next, check that we have all of the event's auth events. + # + # Note that this can raise AuthError, which we want to propagate to the + # caller rather than swallow with `context.rejected` (since we cannot be + # certain that there is a permanent problem with the event). + claimed_auth_events = await self._load_or_fetch_auth_events_for_event( + origin, event + ) + + # ... and check that the event passes auth at those auth events. + try: + check_auth_rules_for_event(room_version_obj, event, claimed_auth_events) + except AuthError as e: + logger.warning( + "While checking auth of %r against auth_events: %s", event, e + ) + context.rejected = RejectedReason.AUTH_ERROR + return context + + # now check auth against what we think the auth events *should* be. prev_state_ids = await context.get_prev_state_ids() auth_events_ids = self._event_auth_handler.compute_auth_events( event, prev_state_ids, for_verification=True @@ -1305,7 +1439,9 @@ class FederationEventHandler: auth_events_for_auth = calculated_auth_event_map try: - check_auth_rules_for_event(room_version_obj, event, auth_events_for_auth) + check_auth_rules_for_event( + room_version_obj, event, auth_events_for_auth.values() + ) except AuthError as e: logger.warning("Failed auth resolution for %r because %s", event, e) context.rejected = RejectedReason.AUTH_ERROR @@ -1403,11 +1539,9 @@ class FederationEventHandler: current_state_ids_list = [ e for k, e in current_state_ids.items() if k in auth_types ] - - auth_events_map = await self._store.get_events(current_state_ids_list) - current_auth_events = { - (e.type, e.state_key): e for e in auth_events_map.values() - } + current_auth_events = await self._store.get_events_as_list( + current_state_ids_list + ) try: check_auth_rules_for_event(room_version_obj, event, current_auth_events) @@ -1472,6 +1606,9 @@ class FederationEventHandler: # if we have missing events, we need to fetch those events from somewhere. # # we start by checking if they are in the store, and then try calling /event_auth/. + # + # TODO: this code is now redundant, since it should be impossible for us to + # get here without already having the auth events. if missing_auth: have_events = await self._store.have_seen_events( event.room_id, missing_auth @@ -1575,7 +1712,7 @@ class FederationEventHandler: logger.info( "After state res: updating auth_events with new state %s", { - (d.type, d.state_key): d.event_id + d for d in new_state.values() if auth_events.get((d.type, d.state_key)) != d }, @@ -1589,6 +1726,75 @@ class FederationEventHandler: return context, auth_events + async def _load_or_fetch_auth_events_for_event( + self, destination: str, event: EventBase + ) -> Collection[EventBase]: + """Fetch this event's auth_events, from database or remote + + Loads any of the auth_events that we already have from the database/cache. If + there are any that are missing, calls /event_auth to get the complete auth + chain for the event (and then attempts to load the auth_events again). + + If any of the auth_events cannot be found, raises an AuthError. This can happen + for a number of reasons; eg: the events don't exist, or we were unable to talk + to `destination`, or we couldn't validate the signature on the event (which + in turn has multiple potential causes). + + Args: + destination: where to send the /event_auth request. Typically the server + that sent us `event` in the first place. + event: the event whose auth_events we want + + Returns: + all of the events in `event.auth_events`, after deduplication + + Raises: + AuthError if we were unable to fetch the auth_events for any reason. + """ + event_auth_event_ids = set(event.auth_event_ids()) + event_auth_events = await self._store.get_events( + event_auth_event_ids, allow_rejected=True + ) + missing_auth_event_ids = event_auth_event_ids.difference( + event_auth_events.keys() + ) + if not missing_auth_event_ids: + return event_auth_events.values() + + logger.info( + "Event %s refers to unknown auth events %s: fetching auth chain", + event, + missing_auth_event_ids, + ) + try: + await self._get_remote_auth_chain_for_event( + destination, event.room_id, event.event_id + ) + except Exception as e: + logger.warning("Failed to get auth chain for %s: %s", event, e) + # in this case, it's very likely we still won't have all the auth + # events - but we pick that up below. + + # try to fetch the auth events we missed list time. + extra_auth_events = await self._store.get_events( + missing_auth_event_ids, allow_rejected=True + ) + missing_auth_event_ids.difference_update(extra_auth_events.keys()) + event_auth_events.update(extra_auth_events) + if not missing_auth_event_ids: + return event_auth_events.values() + + # we still don't have all the auth events. + logger.warning( + "Missing auth events for %s: %s", + event, + shortstr(missing_auth_event_ids), + ) + # the fact we can't find the auth event doesn't mean it doesn't + # exist, which means it is premature to store `event` as rejected. + # instead we raise an AuthError, which will make the caller ignore it. + raise AuthError(code=HTTPStatus.FORBIDDEN, msg="Auth events could not be found") + async def _get_remote_auth_chain_for_event( self, destination: str, room_id: str, event_id: str ) -> None: @@ -1624,9 +1830,7 @@ class FederationEventHandler: for s in seen_remotes: remote_event_map.pop(s, None) - await self._auth_and_persist_fetched_events( - destination, room_id, remote_event_map.values() - ) + await self._auth_and_persist_outliers(room_id, remote_event_map.values()) async def _update_context_for_auth_events( self, event: EventBase, context: EventContext, auth_events: StateMap[EventBase] @@ -1696,16 +1900,27 @@ class FederationEventHandler: # persist_events_and_notify directly.) assert not event.internal_metadata.outlier - try: - if ( - not backfilled - and not context.rejected - and (await self._store.get_min_depth(event.room_id)) <= event.depth - ): + if not backfilled and not context.rejected: + min_depth = await self._store.get_min_depth(event.room_id) + if min_depth is None or min_depth > event.depth: + # XXX richvdh 2021/10/07: I don't really understand what this + # condition is doing. I think it's trying not to send pushes + # for events that predate our join - but that's not really what + # min_depth means, and anyway ancient events are a more general + # problem. + # + # for now I'm just going to log about it. + logger.info( + "Skipping push actions for old event with depth %s < %s", + event.depth, + min_depth, + ) + else: await self._action_generator.handle_push_actions_for_event( event, context ) + try: await self.persist_events_and_notify( event.room_id, [(event, context)], backfilled=backfilled ) @@ -1837,6 +2052,3 @@ class FederationEventHandler: len(ev.auth_event_ids()), ) raise SynapseError(HTTPStatus.BAD_REQUEST, "Too many auth_events") - - async def get_min_depth_for_context(self, context: str) -> int: - return await self._store.get_min_depth(context) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4de9f4b828..2e024b551f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -607,29 +607,6 @@ class EventCreationHandler: builder.internal_metadata.historical = historical - # Strip down the auth_event_ids to only what we need to auth the event. - # For example, we don't need extra m.room.member that don't match event.sender - if auth_event_ids is not None: - # If auth events are provided, prev events must be also. - assert prev_event_ids is not None - - temp_event = await builder.build( - prev_event_ids=prev_event_ids, - auth_event_ids=auth_event_ids, - depth=depth, - ) - auth_events = await self.store.get_events_as_list(auth_event_ids) - # Create a StateMap[str] - auth_event_state_map = { - (e.type, e.state_key): e.event_id for e in auth_events - } - # Actually strip down and use the necessary auth events - auth_event_ids = self._event_auth_handler.compute_auth_events( - event=temp_event, - current_state_ids=auth_event_state_map, - for_verification=False, - ) - event, context = await self.create_new_client_event( builder=builder, requester=requester, @@ -936,6 +913,33 @@ class EventCreationHandler: Tuple of created event, context """ + # Strip down the auth_event_ids to only what we need to auth the event. + # For example, we don't need extra m.room.member that don't match event.sender + full_state_ids_at_event = None + if auth_event_ids is not None: + # If auth events are provided, prev events must be also. + assert prev_event_ids is not None + + # Copy the full auth state before it stripped down + full_state_ids_at_event = auth_event_ids.copy() + + temp_event = await builder.build( + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, + depth=depth, + ) + auth_events = await self.store.get_events_as_list(auth_event_ids) + # Create a StateMap[str] + auth_event_state_map = { + (e.type, e.state_key): e.event_id for e in auth_events + } + # Actually strip down and use the necessary auth events + auth_event_ids = self._event_auth_handler.compute_auth_events( + event=temp_event, + current_state_ids=auth_event_state_map, + for_verification=False, + ) + if prev_event_ids is not None: assert ( len(prev_event_ids) <= 10 @@ -965,6 +969,13 @@ class EventCreationHandler: if builder.internal_metadata.outlier: event.internal_metadata.outlier = True context = EventContext.for_outlier() + elif ( + event.type == EventTypes.MSC2716_INSERTION + and full_state_ids_at_event + and builder.internal_metadata.is_historical() + ): + old_state = await self.store.get_events_as_list(full_state_ids_at_event) + context = await self.state.compute_event_context(event, old_state=old_state) else: context = await self.state.compute_event_context(event) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 176e4dfdd4..60ff896386 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -86,19 +86,22 @@ class PaginationHandler: self._event_serializer = hs.get_event_client_serializer() self._retention_default_max_lifetime = ( - hs.config.server.retention_default_max_lifetime + hs.config.retention.retention_default_max_lifetime ) self._retention_allowed_lifetime_min = ( - hs.config.server.retention_allowed_lifetime_min + hs.config.retention.retention_allowed_lifetime_min ) self._retention_allowed_lifetime_max = ( - hs.config.server.retention_allowed_lifetime_max + hs.config.retention.retention_allowed_lifetime_max ) - if hs.config.worker.run_background_tasks and hs.config.server.retention_enabled: + if ( + hs.config.worker.run_background_tasks + and hs.config.retention.retention_enabled + ): # Run the purge jobs described in the configuration file. - for job in hs.config.server.retention_purge_jobs: + for job in hs.config.retention.retention_purge_jobs: logger.info("Setting up purge job with config: %s", job) self.clock.looping_call( diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 404afb9402..b5968e047b 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -1489,7 +1489,7 @@ def format_user_presence_state( The "user_id" is optional so that this function can be used to format presence updates for client /sync responses and for federation /send requests. """ - content = {"presence": state.state} + content: JsonDict = {"presence": state.state} if include_user_id: content["user_id"] = state.user_id if state.last_active_ts: diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 7072bca1fc..6f39e9446f 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -465,17 +465,35 @@ class RoomCreationHandler: # the room has been created # Calculate the minimum power level needed to clone the room event_power_levels = power_levels.get("events", {}) + if not isinstance(event_power_levels, dict): + event_power_levels = {} state_default = power_levels.get("state_default", 50) + try: + state_default_int = int(state_default) # type: ignore[arg-type] + except (TypeError, ValueError): + state_default_int = 50 ban = power_levels.get("ban", 50) - needed_power_level = max(state_default, ban, max(event_power_levels.values())) + try: + ban = int(ban) # type: ignore[arg-type] + except (TypeError, ValueError): + ban = 50 + needed_power_level = max( + state_default_int, ban, max(event_power_levels.values()) + ) # Get the user's current power level, this matches the logic in get_user_power_level, # but without the entire state map. user_power_levels = power_levels.setdefault("users", {}) + if not isinstance(user_power_levels, dict): + user_power_levels = {} users_default = power_levels.get("users_default", 0) current_power_level = user_power_levels.get(user_id, users_default) + try: + current_power_level_int = int(current_power_level) # type: ignore[arg-type] + except (TypeError, ValueError): + current_power_level_int = 0 # Raise the requester's power level in the new room if necessary - if current_power_level < needed_power_level: + if current_power_level_int < needed_power_level: user_power_levels[user_id] = needed_power_level await self._send_events_for_new_room( diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 51dd4e7555..2f5a3e4d19 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -13,6 +13,10 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +def generate_fake_event_id() -> str: + return "$fake_" + random_string(43) + + class RoomBatchHandler: def __init__(self, hs: "HomeServer"): self.hs = hs @@ -177,6 +181,11 @@ class RoomBatchHandler: state_event_ids_at_start = [] auth_event_ids = initial_auth_event_ids.copy() + + # Make the state events float off on their own so we don't have a + # bunch of `@mxid joined the room` noise between each batch + prev_event_id_for_state_chain = generate_fake_event_id() + for state_event in state_events_at_start: assert_params_in_dict( state_event, ["type", "origin_server_ts", "content", "sender"] @@ -200,10 +209,6 @@ class RoomBatchHandler: # Mark all events as historical event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True - # Make the state events float off on their own so we don't have a - # bunch of `@mxid joined the room` noise between each batch - fake_prev_event_id = "$" + random_string(43) - # TODO: This is pretty much the same as some other code to handle inserting state in this file if event_dict["type"] == EventTypes.Member: membership = event_dict["content"].get("membership", None) @@ -216,7 +221,7 @@ class RoomBatchHandler: action=membership, content=event_dict["content"], outlier=True, - prev_event_ids=[fake_prev_event_id], + prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. @@ -235,7 +240,7 @@ class RoomBatchHandler: ), event_dict, outlier=True, - prev_event_ids=[fake_prev_event_id], + prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. @@ -245,6 +250,8 @@ class RoomBatchHandler: state_event_ids_at_start.append(event_id) auth_event_ids.append(event_id) + # Connect all the state in a floating chain + prev_event_id_for_state_chain = event_id return state_event_ids_at_start @@ -289,6 +296,10 @@ class RoomBatchHandler: for ev in events_to_create: assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) + assert self.hs.is_mine_id(ev["sender"]), "User must be our own: %s" % ( + ev["sender"], + ) + event_dict = { "type": ev["type"], "origin_server_ts": ev["origin_server_ts"], @@ -311,6 +322,19 @@ class RoomBatchHandler: historical=True, depth=inherited_depth, ) + + assert context._state_group + + # Normally this is done when persisting the event but we have to + # pre-emptively do it here because we create all the events first, + # then persist them in another pass below. And we want to share + # state_groups across the whole batch so this lookup needs to work + # for the next event in the batch in this loop. + await self.store.store_state_group_id_for_event_id( + event_id=event.event_id, + state_group_id=context._state_group, + ) + logger.debug( "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", event, @@ -318,10 +342,6 @@ class RoomBatchHandler: auth_event_ids, ) - assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( - event.sender, - ) - events_to_persist.append((event, context)) event_id = event.event_id diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 8810f048ba..991fee7e58 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -196,63 +196,12 @@ class UserDirectoryHandler(StateDeltasHandler): room_id, prev_event_id, event_id, typ ) elif typ == EventTypes.Member: - change = await self._get_key_change( + await self._handle_room_membership_event( + room_id, prev_event_id, event_id, - key_name="membership", - public_value=Membership.JOIN, + state_key, ) - - is_remote = not self.is_mine_id(state_key) - if change is MatchChange.now_false: - # Need to check if the server left the room entirely, if so - # we might need to remove all the users in that room - is_in_room = await self.store.is_host_joined( - room_id, self.server_name - ) - if not is_in_room: - logger.debug("Server left room: %r", room_id) - # Fetch all the users that we marked as being in user - # directory due to being in the room and then check if - # need to remove those users or not - user_ids = await self.store.get_users_in_dir_due_to_room( - room_id - ) - - for user_id in user_ids: - await self._handle_remove_user(room_id, user_id) - continue - else: - logger.debug("Server is still in room: %r", room_id) - - include_in_dir = ( - is_remote - or await self.store.should_include_local_user_in_dir(state_key) - ) - if include_in_dir: - if change is MatchChange.no_change: - # Handle any profile changes for remote users. - # (For local users we are not forced to scan membership - # events; instead the rest of the application calls - # `handle_local_profile_change`.) - if is_remote: - await self._handle_profile_change( - state_key, room_id, prev_event_id, event_id - ) - continue - - if change is MatchChange.now_true: # The user joined - # This may be the first time we've seen a remote user. If - # so, ensure we have a directory entry for them. (We don't - # need to do this for local users: their directory entry - # is created at the point of registration. - if is_remote: - await self._upsert_directory_entry_for_remote_user( - state_key, event_id - ) - await self._track_user_joined_room(room_id, state_key) - else: # The user left - await self._handle_remove_user(room_id, state_key) else: logger.debug("Ignoring irrelevant type: %r", typ) @@ -317,14 +266,83 @@ class UserDirectoryHandler(StateDeltasHandler): for user_id in users_in_room: await self.store.remove_user_who_share_room(user_id, room_id) - # Then, re-add them to the tables. + # Then, re-add all remote users and some local users to the tables. # NOTE: this is not the most efficient method, as _track_user_joined_room sets # up local_user -> other_user and other_user_whos_local -> local_user, # which when ran over an entire room, will result in the same values # being added multiple times. The batching upserts shouldn't make this # too bad, though. for user_id in users_in_room: - await self._track_user_joined_room(room_id, user_id) + if not self.is_mine_id( + user_id + ) or await self.store.should_include_local_user_in_dir(user_id): + await self._track_user_joined_room(room_id, user_id) + + async def _handle_room_membership_event( + self, + room_id: str, + prev_event_id: str, + event_id: str, + state_key: str, + ) -> None: + """Process a single room membershp event. + + We have to do two things: + + 1. Update the room-sharing tables. + This applies to remote users and non-excluded local users. + 2. Update the user_directory and user_directory_search tables. + This applies to remote users only, because we only become aware of + the (and any profile changes) by listening to these events. + The rest of the application knows exactly when local users are + created or their profile changed---it will directly call methods + on this class. + """ + joined = await self._get_key_change( + prev_event_id, + event_id, + key_name="membership", + public_value=Membership.JOIN, + ) + + # Both cases ignore excluded local users, so start by discarding them. + is_remote = not self.is_mine_id(state_key) + if not is_remote and not await self.store.should_include_local_user_in_dir( + state_key + ): + return + + if joined is MatchChange.now_false: + # Need to check if the server left the room entirely, if so + # we might need to remove all the users in that room + is_in_room = await self.store.is_host_joined(room_id, self.server_name) + if not is_in_room: + logger.debug("Server left room: %r", room_id) + # Fetch all the users that we marked as being in user + # directory due to being in the room and then check if + # need to remove those users or not + user_ids = await self.store.get_users_in_dir_due_to_room(room_id) + + for user_id in user_ids: + await self._handle_remove_user(room_id, user_id) + else: + logger.debug("Server is still in room: %r", room_id) + await self._handle_remove_user(room_id, state_key) + elif joined is MatchChange.no_change: + # Handle any profile changes for remote users. + # (For local users the rest of the application calls + # `handle_local_profile_change`.) + if is_remote: + await self._handle_possible_remote_profile_change( + state_key, room_id, prev_event_id, event_id + ) + elif joined is MatchChange.now_true: # The user joined + # This may be the first time we've seen a remote user. If + # so, ensure we have a directory entry for them. (For local users, + # the rest of the application calls `handle_local_profile_change`.) + if is_remote: + await self._upsert_directory_entry_for_remote_user(state_key, event_id) + await self._track_user_joined_room(room_id, state_key) async def _upsert_directory_entry_for_remote_user( self, user_id: str, event_id: str @@ -349,8 +367,8 @@ class UserDirectoryHandler(StateDeltasHandler): """Someone's just joined a room. Update `users_in_public_rooms` or `users_who_share_private_rooms` as appropriate. - The caller is responsible for ensuring that the given user is not excluded - from the user directory. + The caller is responsible for ensuring that the given user should be + included in the user directory. """ is_public = await self.store.is_room_world_readable_or_publicly_joinable( room_id @@ -386,24 +404,32 @@ class UserDirectoryHandler(StateDeltasHandler): await self.store.add_users_who_share_private_room(room_id, to_insert) async def _handle_remove_user(self, room_id: str, user_id: str) -> None: - """Called when we might need to remove user from directory + """Called when when someone leaves a room. The user may be local or remote. + + (If the person who left was the last local user in this room, the server + is no longer in the room. We call this function to forget that the remaining + remote users are in the room, even though they haven't left. So the name is + a little misleading!) Args: room_id: The room ID that user left or stopped being public that user_id """ - logger.debug("Removing user %r", user_id) + logger.debug("Removing user %r from room %r", user_id, room_id) # Remove user from sharing tables await self.store.remove_user_who_share_room(user_id, room_id) - # Are they still in any rooms? If not, remove them entirely. - rooms_user_is_in = await self.store.get_user_dir_rooms_user_is_in(user_id) + # Additionally, if they're a remote user and we're no longer joined + # to any rooms they're in, remove them from the user directory. + if not self.is_mine_id(user_id): + rooms_user_is_in = await self.store.get_user_dir_rooms_user_is_in(user_id) - if len(rooms_user_is_in) == 0: - await self.store.remove_from_user_dir(user_id) + if len(rooms_user_is_in) == 0: + logger.debug("Removing user %r from directory", user_id) + await self.store.remove_from_user_dir(user_id) - async def _handle_profile_change( + async def _handle_possible_remote_profile_change( self, user_id: str, room_id: str, @@ -411,7 +437,8 @@ class UserDirectoryHandler(StateDeltasHandler): event_id: Optional[str], ) -> None: """Check member event changes for any profile changes and update the - database if there are. + database if there are. This is intended for remote users only. The caller + is responsible for checking that the given user is remote. """ if not prev_event_id or not event_id: return diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 8ae21bc43c..ab7ef8f950 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -45,6 +45,7 @@ from synapse.http.servlet import parse_json_object_from_request from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.rest.client.login import LoginResponse from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo from synapse.storage.state import StateFilter @@ -83,6 +84,8 @@ __all__ = [ "DirectServeJsonResource", "ModuleApi", "PRESENCE_ALL_USERS", + "LoginResponse", + "JsonDict", ] logger = logging.getLogger(__name__) @@ -139,6 +142,7 @@ class ModuleApi: self._spam_checker = hs.get_spam_checker() self._account_validity_handler = hs.get_account_validity_handler() self._third_party_event_rules = hs.get_third_party_event_rules() + self._password_auth_provider = hs.get_password_auth_provider() self._presence_router = hs.get_presence_router() ################################################################################# @@ -164,6 +168,11 @@ class ModuleApi: """Registers callbacks for presence router capabilities.""" return self._presence_router.register_presence_router_callbacks + @property + def register_password_auth_provider_callbacks(self): + """Registers callbacks for password auth provider capabilities.""" + return self._password_auth_provider.register_password_auth_provider_callbacks + def register_web_resource(self, path: str, resource: IResource): """Registers a web resource to be served at the given path. @@ -773,9 +782,9 @@ class ModuleApi: # Sanitize some of the data. We don't want to return tokens. return [ UserIpAndAgent( - ip=str(data["ip"]), - user_agent=str(data["user_agent"]), - last_seen=int(data["last_seen"]), + ip=data["ip"], + user_agent=data["user_agent"], + last_seen=data["last_seen"], ) for data in raw_data ] diff --git a/synapse/module_api/errors.py b/synapse/module_api/errors.py index 98ea911a81..1db900e41f 100644 --- a/synapse/module_api/errors.py +++ b/synapse/module_api/errors.py @@ -14,9 +14,16 @@ """Exception types which are exposed as part of the stable module API""" -from synapse.api.errors import ( # noqa: F401 +from synapse.api.errors import ( InvalidClientCredentialsError, RedirectException, SynapseError, ) -from synapse.config._base import ConfigError # noqa: F401 +from synapse.config._base import ConfigError + +__all__ = [ + "InvalidClientCredentialsError", + "RedirectException", + "SynapseError", + "ConfigError", +] diff --git a/synapse/py.typed b/synapse/py.typed new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/synapse/py.typed diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 8c80153ab6..7bae36db16 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -182,9 +182,13 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. - self._logging_context = BackgroundProcessLoggingContext( - "replication-conn", self.conn_id - ) + with PreserveLoggingContext(): + # thanks to `PreserveLoggingContext()`, the new logcontext is guaranteed to + # capture the sentinel context as its containing context and won't prevent + # GC of / unintentionally reactivate what would be the current context. + self._logging_context = BackgroundProcessLoggingContext( + "replication-conn", self.conn_id + ) def connectionMade(self): logger.info("[%s] Connection established", self.id()) @@ -434,8 +438,12 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): if self.transport: self.transport.unregisterProducer() - # mark the logging context as finished - self._logging_context.__exit__(None, None, None) + # mark the logging context as finished by triggering `__exit__()` + with PreserveLoggingContext(): + with self._logging_context: + pass + # the sentinel context is now active, which may not be correct. + # PreserveLoggingContext() will restore the correct logging context. def __str__(self): addr = None diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py index 062fe2f33e..8d28bd3f3f 100644 --- a/synapse/replication/tcp/redis.py +++ b/synapse/replication/tcp/redis.py @@ -100,9 +100,13 @@ class RedisSubscriber(txredisapi.SubscriberProtocol): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. - self._logging_context = BackgroundProcessLoggingContext( - "replication_command_handler" - ) + with PreserveLoggingContext(): + # thanks to `PreserveLoggingContext()`, the new logcontext is guaranteed to + # capture the sentinel context as its containing context and won't prevent + # GC of / unintentionally reactivate what would be the current context. + self._logging_context = BackgroundProcessLoggingContext( + "replication_command_handler" + ) def connectionMade(self): logger.info("Connected to redis") @@ -182,8 +186,12 @@ class RedisSubscriber(txredisapi.SubscriberProtocol): super().connectionLost(reason) self.synapse_handler.lost_connection(self) - # mark the logging context as finished - self._logging_context.__exit__(None, None, None) + # mark the logging context as finished by triggering `__exit__()` + with PreserveLoggingContext(): + with self._logging_context: + pass + # the sentinel context is now active, which may not be correct. + # PreserveLoggingContext() will restore the correct logging context. def send_command(self, cmd: Command): """Send a command if connection has been established. diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py index 0b0711c03c..d695c18be2 100644 --- a/synapse/rest/client/relations.py +++ b/synapse/rest/client/relations.py @@ -232,12 +232,12 @@ class RelationPaginationServlet(RestServlet): # Similarly, we don't allow relations to be applied to relations, so we # return the original relations without any aggregations on top of them # here. - events = await self._event_serializer.serialize_events( + serialized_events = await self._event_serializer.serialize_events( events, now, bundle_aggregations=False ) return_value = pagination_chunk.to_dict() - return_value["chunk"] = events + return_value["chunk"] = serialized_events return_value["original_event"] = original_event return 200, return_value @@ -416,10 +416,10 @@ class RelationAggregationGroupPaginationServlet(RestServlet): ) now = self.clock.time_msec() - events = await self._event_serializer.serialize_events(events, now) + serialized_events = await self._event_serializer.serialize_events(events, now) return_value = result.to_dict() - return_value["chunk"] = events + return_value["chunk"] = serialized_events return 200, return_value diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 38ad4c2447..99f8156ad0 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -32,7 +32,6 @@ from synapse.http.servlet import ( from synapse.http.site import SynapseRequest from synapse.rest.client.transactions import HttpTransactionCache from synapse.types import JsonDict -from synapse.util.stringutils import random_string if TYPE_CHECKING: from synapse.server import HomeServer @@ -160,11 +159,6 @@ class RoomBatchSendEventRestServlet(RestServlet): base_insertion_event = None if batch_id_from_query: batch_id_to_connect_to = batch_id_from_query - # All but the first base insertion event should point at a fake - # event, which causes the HS to ask for the state at the start of - # the batch later. - fake_prev_event_id = "$" + random_string(43) - prev_event_ids = [fake_prev_event_id] # Otherwise, create an insertion event to act as a starting point. # # We don't always have an insertion event to start hanging more history @@ -173,8 +167,6 @@ class RoomBatchSendEventRestServlet(RestServlet): # an insertion event), in which case we just create a new insertion event # that can then get pointed to by a "marker" event later. else: - prev_event_ids = prev_event_ids_from_query - base_insertion_event_dict = ( self.room_batch_handler.create_insertion_event_dict( sender=requester.user.to_string(), @@ -182,7 +174,7 @@ class RoomBatchSendEventRestServlet(RestServlet): origin_server_ts=last_event_in_batch["origin_server_ts"], ) ) - base_insertion_event_dict["prev_events"] = prev_event_ids.copy() + base_insertion_event_dict["prev_events"] = prev_event_ids_from_query.copy() ( base_insertion_event, @@ -203,6 +195,11 @@ class RoomBatchSendEventRestServlet(RestServlet): EventContentFields.MSC2716_NEXT_BATCH_ID ] + # Also connect the historical event chain to the end of the floating + # state chain, which causes the HS to ask for the state at the start of + # the batch later. + prev_event_ids = [state_event_ids_at_start[-1]] + # Create and persist all of the historical events as well as insertion # and batch meta events to make the batch navigable in the DAG. event_ids, next_batch_id = await self.room_batch_handler.handle_batch_of_events( diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 08bd85f664..bec77088ee 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -16,12 +16,15 @@ import functools import os import re -from typing import Any, Callable, List +from typing import Any, Callable, List, TypeVar, cast NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") -def _wrap_in_base_path(func: Callable[..., str]) -> Callable[..., str]: +F = TypeVar("F", bound=Callable[..., str]) + + +def _wrap_in_base_path(func: F) -> F: """Takes a function that returns a relative path and turns it into an absolute path based on the location of the primary media store """ @@ -31,7 +34,7 @@ def _wrap_in_base_path(func: Callable[..., str]) -> Callable[..., str]: path = func(self, *args, **kwargs) return os.path.join(self.base_path, path) - return _wrapped + return cast(F, _wrapped) class MediaFilePaths: @@ -45,23 +48,6 @@ class MediaFilePaths: def __init__(self, primary_base_path: str): self.base_path = primary_base_path - def default_thumbnail_rel( - self, - default_top_level: str, - default_sub_type: str, - width: int, - height: int, - content_type: str, - method: str, - ) -> str: - top_level_type, sub_type = content_type.split("/") - file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) - return os.path.join( - "default_thumbnails", default_top_level, default_sub_type, file_name - ) - - default_thumbnail = _wrap_in_base_path(default_thumbnail_rel) - def local_media_filepath_rel(self, media_id: str) -> str: return os.path.join("local_content", media_id[0:2], media_id[2:4], media_id[4:]) diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index 78b1603f19..2a59552c20 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -17,7 +17,6 @@ from typing import TYPE_CHECKING, List, Optional import attr -from synapse.http.client import SimpleHttpClient from synapse.types import JsonDict from synapse.util import json_decoder @@ -48,7 +47,7 @@ class OEmbedProvider: requesting/parsing oEmbed content. """ - def __init__(self, hs: "HomeServer", client: SimpleHttpClient): + def __init__(self, hs: "HomeServer"): self._oembed_patterns = {} for oembed_endpoint in hs.config.oembed.oembed_patterns: api_endpoint = oembed_endpoint.api_endpoint @@ -69,7 +68,6 @@ class OEmbedProvider: # Iterate through each URL pattern and point it to the endpoint. for pattern in oembed_endpoint.url_patterns: self._oembed_patterns[pattern] = api_endpoint - self._client = client def get_oembed_url(self, url: str) -> Optional[str]: """ @@ -139,10 +137,11 @@ class OEmbedProvider: # oEmbed responses *must* be UTF-8 according to the spec. oembed = json_decoder.decode(raw_body.decode("utf-8")) - # Ensure there's a version of 1.0. - oembed_version = oembed["version"] - if oembed_version != "1.0": - raise RuntimeError(f"Invalid version: {oembed_version}") + # The version is a required string field, but not always provided, + # or sometimes provided as a float. Be lenient. + oembed_version = oembed.get("version", "1.0") + if oembed_version != "1.0" and oembed_version != 1: + raise RuntimeError(f"Invalid oEmbed version: {oembed_version}") # Ensure the cache age is None or an int. cache_age = oembed.get("cache_age") diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 1fe0fc8aa9..278fd901e2 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import codecs import datetime import errno import fnmatch @@ -22,7 +23,7 @@ import re import shutil import sys import traceback -from typing import TYPE_CHECKING, Dict, Generator, Iterable, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, Generator, Iterable, Optional, Set, Tuple, Union from urllib import parse as urlparse import attr @@ -140,7 +141,7 @@ class PreviewUrlResource(DirectServeJsonResource): self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage - self._oembed = OEmbedProvider(hs, self.client) + self._oembed = OEmbedProvider(hs) # We run the background jobs if we're the instance specified (or no # instance is specified, where we assume there is only one instance @@ -295,8 +296,7 @@ class PreviewUrlResource(DirectServeJsonResource): with open(media_info.filename, "rb") as file: body = file.read() - encoding = get_html_media_encoding(body, media_info.media_type) - tree = decode_body(body, encoding) + tree = decode_body(body, media_info.uri, media_info.media_type) if tree is not None: # Check if this HTML document points to oEmbed information and # defer to that. @@ -632,16 +632,27 @@ class PreviewUrlResource(DirectServeJsonResource): logger.debug("No media removed from url cache") -def get_html_media_encoding(body: bytes, content_type: str) -> str: +def _normalise_encoding(encoding: str) -> Optional[str]: + """Use the Python codec's name as the normalised entry.""" + try: + return codecs.lookup(encoding).name + except LookupError: + return None + + +def get_html_media_encodings(body: bytes, content_type: Optional[str]) -> Iterable[str]: """ - Get the encoding of the body based on the (presumably) HTML body or media_type. + Get potential encoding of the body based on the (presumably) HTML body or the content-type header. The precedence used for finding a character encoding is: - 1. meta tag with a charset declared. + 1. <meta> tag with a charset declared. 2. The XML document's character encoding attribute. 3. The Content-Type header. - 4. Fallback to UTF-8. + 4. Fallback to utf-8. + 5. Fallback to windows-1252. + + This roughly follows the algorithm used by BeautifulSoup's bs4.dammit.EncodingDetector. Args: body: The HTML document, as bytes. @@ -650,39 +661,55 @@ def get_html_media_encoding(body: bytes, content_type: str) -> str: Returns: The character encoding of the body, as a string. """ + # There's no point in returning an encoding more than once. + attempted_encodings: Set[str] = set() + # Limit searches to the first 1kb, since it ought to be at the top. body_start = body[:1024] - # Let's try and figure out if it has an encoding set in a meta tag. + # Check if it has an encoding set in a meta tag. match = _charset_match.search(body_start) if match: - return match.group(1).decode("ascii") + encoding = _normalise_encoding(match.group(1).decode("ascii")) + if encoding: + attempted_encodings.add(encoding) + yield encoding # TODO Support <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> - # If we didn't find a match, see if it an XML document with an encoding. + # Check if it has an XML document with an encoding. match = _xml_encoding_match.match(body_start) if match: - return match.group(1).decode("ascii") - - # If we don't find a match, we'll look at the HTTP Content-Type, and - # if that doesn't exist, we'll fall back to UTF-8. - content_match = _content_type_match.match(content_type) - if content_match: - return content_match.group(1) - - return "utf-8" + encoding = _normalise_encoding(match.group(1).decode("ascii")) + if encoding and encoding not in attempted_encodings: + attempted_encodings.add(encoding) + yield encoding + + # Check the HTTP Content-Type header for a character set. + if content_type: + content_match = _content_type_match.match(content_type) + if content_match: + encoding = _normalise_encoding(content_match.group(1)) + if encoding and encoding not in attempted_encodings: + attempted_encodings.add(encoding) + yield encoding + + # Finally, fallback to UTF-8, then windows-1252. + for fallback in ("utf-8", "cp1252"): + if fallback not in attempted_encodings: + yield fallback def decode_body( - body: bytes, request_encoding: Optional[str] = None + body: bytes, uri: str, content_type: Optional[str] = None ) -> Optional["etree.Element"]: """ This uses lxml to parse the HTML document. Args: body: The HTML document, as bytes. - request_encoding: The character encoding of the body, as a string. + uri: The URI used to download the body. + content_type: The Content-Type header. Returns: The parsed HTML body, or None if an error occurred during processed. @@ -691,32 +718,25 @@ def decode_body( if not body: return None - from lxml import etree - - # Create an HTML parser. If this fails, log and return no metadata. - try: - parser = etree.HTMLParser(recover=True, encoding=request_encoding) - except LookupError: - # blindly consider the encoding as utf-8. - parser = etree.HTMLParser(recover=True, encoding="utf-8") - except Exception as e: - logger.warning("Unable to create HTML parser: %s" % (e,)) + for encoding in get_html_media_encodings(body, content_type): + try: + body_str = body.decode(encoding) + except Exception: + pass + else: + break + else: + logger.warning("Unable to decode HTML body for %s", uri) return None - def _attempt_decode_body( - body_attempt: Union[bytes, str] - ) -> Optional["etree.Element"]: - # Attempt to parse the body. Returns None if the body was successfully - # parsed, but no tree was found. - return etree.fromstring(body_attempt, parser) + from lxml import etree - # Attempt to parse the body. If this fails, log and return no metadata. - try: - return _attempt_decode_body(body) - except UnicodeDecodeError: - # blindly try decoding the body as utf-8, which seems to fix - # the charset mismatches on https://google.com - return _attempt_decode_body(body.decode("utf-8", "ignore")) + # Create an HTML parser. + parser = etree.HTMLParser(recover=True, encoding="utf-8") + + # Attempt to parse the body. Returns None if the body was successfully + # parsed, but no tree was found. + return etree.fromstring(body_str, parser) def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: diff --git a/synapse/server.py b/synapse/server.py index 5bc045d615..a64c846d1c 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -65,7 +65,7 @@ from synapse.handlers.account_data import AccountDataHandler from synapse.handlers.account_validity import AccountValidityHandler from synapse.handlers.admin import AdminHandler from synapse.handlers.appservice import ApplicationServicesHandler -from synapse.handlers.auth import AuthHandler, MacaroonGenerator +from synapse.handlers.auth import AuthHandler, MacaroonGenerator, PasswordAuthProvider from synapse.handlers.cas import CasHandler from synapse.handlers.deactivate_account import DeactivateAccountHandler from synapse.handlers.device import DeviceHandler, DeviceWorkerHandler @@ -688,6 +688,10 @@ class HomeServer(metaclass=abc.ABCMeta): return ThirdPartyEventRules(self) @cache_in_self + def get_password_auth_provider(self) -> PasswordAuthProvider: + return PasswordAuthProvider() + + @cache_in_self def get_room_member_handler(self) -> RoomMemberHandler: if self.config.worker.worker_app: return RoomMemberWorkerHandler(self) diff --git a/synapse/state/v1.py b/synapse/state/v1.py index ffe6207a3c..6edadea550 100644 --- a/synapse/state/v1.py +++ b/synapse/state/v1.py @@ -332,7 +332,7 @@ def _resolve_auth_events( event_auth.check_auth_rules_for_event( RoomVersions.V1, event, - auth_events, + auth_events.values(), ) prev_event = event except AuthError: @@ -350,7 +350,7 @@ def _resolve_normal_events( event_auth.check_auth_rules_for_event( RoomVersions.V1, event, - auth_events, + auth_events.values(), ) return event except AuthError: diff --git a/synapse/state/v2.py b/synapse/state/v2.py index bd18eefd58..c618df2fde 100644 --- a/synapse/state/v2.py +++ b/synapse/state/v2.py @@ -549,7 +549,7 @@ async def _iterative_auth_checks( event_auth.check_auth_rules_for_event( room_version, event, - auth_events, + auth_events.values(), ) resolved_state[(event.type, event.state_key)] = event_id diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index 6c1ef09049..b81d9218ce 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -13,14 +13,26 @@ # limitations under the License. import logging -from typing import Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union, cast + +from typing_extensions import TypedDict from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool, make_tuple_comparison_clause -from synapse.types import UserID +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, + make_tuple_comparison_clause, +) +from synapse.storage.databases.main.monthly_active_users import MonthlyActiveUsersStore +from synapse.storage.types import Connection +from synapse.types import JsonDict, UserID from synapse.util.caches.lrucache import LruCache +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) # Number of msec of granularity to store the user IP 'last seen' time. Smaller @@ -29,8 +41,31 @@ logger = logging.getLogger(__name__) LAST_SEEN_GRANULARITY = 120 * 1000 +class DeviceLastConnectionInfo(TypedDict): + """Metadata for the last connection seen for a user and device combination""" + + # These types must match the columns in the `devices` table + user_id: str + device_id: str + + ip: Optional[str] + user_agent: Optional[str] + last_seen: Optional[int] + + +class LastConnectionInfo(TypedDict): + """Metadata for the last connection seen for an access token and IP combination""" + + # These types must match the columns in the `user_ips` table + access_token: str + ip: str + + user_agent: str + last_seen: int + + class ClientIpBackgroundUpdateStore(SQLBaseStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): super().__init__(database, db_conn, hs) self.db_pool.updates.register_background_index_update( @@ -81,8 +116,10 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): "devices_last_seen", self._devices_last_seen_update ) - async def _remove_user_ip_nonunique(self, progress, batch_size): - def f(conn): + async def _remove_user_ip_nonunique( + self, progress: JsonDict, batch_size: int + ) -> int: + def f(conn: LoggingDatabaseConnection) -> None: txn = conn.cursor() txn.execute("DROP INDEX IF EXISTS user_ips_user_ip") txn.close() @@ -93,14 +130,14 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): ) return 1 - async def _analyze_user_ip(self, progress, batch_size): + async def _analyze_user_ip(self, progress: JsonDict, batch_size: int) -> int: # Background update to analyze user_ips table before we run the # deduplication background update. The table may not have been analyzed # for ages due to the table locks. # # This will lock out the naive upserts to user_ips while it happens, but # the analyze should be quick (28GB table takes ~10s) - def user_ips_analyze(txn): + def user_ips_analyze(txn: LoggingTransaction) -> None: txn.execute("ANALYZE user_ips") await self.db_pool.runInteraction("user_ips_analyze", user_ips_analyze) @@ -109,16 +146,16 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): return 1 - async def _remove_user_ip_dupes(self, progress, batch_size): + async def _remove_user_ip_dupes(self, progress: JsonDict, batch_size: int) -> int: # This works function works by scanning the user_ips table in batches # based on `last_seen`. For each row in a batch it searches the rest of # the table to see if there are any duplicates, if there are then they # are removed and replaced with a suitable row. # Fetch the start of the batch - begin_last_seen = progress.get("last_seen", 0) + begin_last_seen: int = progress.get("last_seen", 0) - def get_last_seen(txn): + def get_last_seen(txn: LoggingTransaction) -> Optional[int]: txn.execute( """ SELECT last_seen FROM user_ips @@ -129,7 +166,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): """, (begin_last_seen, batch_size), ) - row = txn.fetchone() + row = cast(Optional[Tuple[int]], txn.fetchone()) if row: return row[0] else: @@ -149,7 +186,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): end_last_seen, ) - def remove(txn): + def remove(txn: LoggingTransaction) -> None: # This works by looking at all entries in the given time span, and # then for each (user_id, access_token, ip) tuple in that range # checking for any duplicates in the rest of the table (via a join). @@ -161,10 +198,12 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): # Define the search space, which requires handling the last batch in # a different way + args: Tuple[int, ...] if last: clause = "? <= last_seen" args = (begin_last_seen,) else: + assert end_last_seen is not None clause = "? <= last_seen AND last_seen < ?" args = (begin_last_seen, end_last_seen) @@ -189,7 +228,9 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): ), args, ) - res = txn.fetchall() + res = cast( + List[Tuple[str, str, str, Optional[str], str, int, int]], txn.fetchall() + ) # We've got some duplicates for i in res: @@ -278,13 +319,15 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): return batch_size - async def _devices_last_seen_update(self, progress, batch_size): + async def _devices_last_seen_update( + self, progress: JsonDict, batch_size: int + ) -> int: """Background update to insert last seen info into devices table""" - last_user_id = progress.get("last_user_id", "") - last_device_id = progress.get("last_device_id", "") + last_user_id: str = progress.get("last_user_id", "") + last_device_id: str = progress.get("last_device_id", "") - def _devices_last_seen_update_txn(txn): + def _devices_last_seen_update_txn(txn: LoggingTransaction) -> int: # This consists of two queries: # # 1. The sub-query searches for the next N devices and joins @@ -296,6 +339,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): # we'll just end up updating the same device row multiple # times, which is fine. + where_args: List[Union[str, int]] where_clause, where_args = make_tuple_comparison_clause( [("user_id", last_user_id), ("device_id", last_device_id)], ) @@ -319,7 +363,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): } txn.execute(sql, where_args + [batch_size]) - rows = txn.fetchall() + rows = cast(List[Tuple[int, str, str, str, str]], txn.fetchall()) if not rows: return 0 @@ -350,7 +394,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): super().__init__(database, db_conn, hs) self.user_ips_max_age = hs.config.server.user_ips_max_age @@ -359,7 +403,7 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) @wrap_as_background_process("prune_old_user_ips") - async def _prune_old_user_ips(self): + async def _prune_old_user_ips(self) -> None: """Removes entries in user IPs older than the configured period.""" if self.user_ips_max_age is None: @@ -394,9 +438,9 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): ) """ - timestamp = self.clock.time_msec() - self.user_ips_max_age + timestamp = self._clock.time_msec() - self.user_ips_max_age - def _prune_old_user_ips_txn(txn): + def _prune_old_user_ips_txn(txn: LoggingTransaction) -> None: txn.execute(sql, (timestamp,)) await self.db_pool.runInteraction( @@ -405,7 +449,7 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): async def get_last_client_ip_by_device( self, user_id: str, device_id: Optional[str] - ) -> Dict[Tuple[str, str], dict]: + ) -> Dict[Tuple[str, str], DeviceLastConnectionInfo]: """For each device_id listed, give the user_ip it was last seen on. The result might be slightly out of date as client IPs are inserted in batches. @@ -423,26 +467,32 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): if device_id is not None: keyvalues["device_id"] = device_id - res = await self.db_pool.simple_select_list( - table="devices", - keyvalues=keyvalues, - retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), + res = cast( + List[DeviceLastConnectionInfo], + await self.db_pool.simple_select_list( + table="devices", + keyvalues=keyvalues, + retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), + ), ) return {(d["user_id"], d["device_id"]): d for d in res} -class ClientIpStore(ClientIpWorkerStore): - def __init__(self, database: DatabasePool, db_conn, hs): +class ClientIpStore(ClientIpWorkerStore, MonthlyActiveUsersStore): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): - self.client_ip_last_seen = LruCache( + # (user_id, access_token, ip,) -> last_seen + self.client_ip_last_seen = LruCache[Tuple[str, str, str], int]( cache_name="client_ip_last_seen", max_size=50000 ) super().__init__(database, db_conn, hs) # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) - self._batch_row_update = {} + self._batch_row_update: Dict[ + Tuple[str, str, str], Tuple[str, Optional[str], int] + ] = {} self._client_ip_looper = self._clock.looping_call( self._update_client_ips_batch, 5 * 1000 @@ -452,8 +502,14 @@ class ClientIpStore(ClientIpWorkerStore): ) async def insert_client_ip( - self, user_id, access_token, ip, user_agent, device_id, now=None - ): + self, + user_id: str, + access_token: str, + ip: str, + user_agent: str, + device_id: Optional[str], + now: Optional[int] = None, + ) -> None: if not now: now = int(self._clock.time_msec()) key = (user_id, access_token, ip) @@ -485,7 +541,11 @@ class ClientIpStore(ClientIpWorkerStore): "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update ) - def _update_client_ips_batch_txn(self, txn, to_update): + def _update_client_ips_batch_txn( + self, + txn: LoggingTransaction, + to_update: Mapping[Tuple[str, str, str], Tuple[str, Optional[str], int]], + ) -> None: if "user_ips" in self.db_pool._unsafe_to_upsert_tables or ( not self.database_engine.can_native_upsert ): @@ -525,7 +585,7 @@ class ClientIpStore(ClientIpWorkerStore): async def get_last_client_ip_by_device( self, user_id: str, device_id: Optional[str] - ) -> Dict[Tuple[str, str], dict]: + ) -> Dict[Tuple[str, str], DeviceLastConnectionInfo]: """For each device_id listed, give the user_ip it was last seen on Args: @@ -561,12 +621,12 @@ class ClientIpStore(ClientIpWorkerStore): async def get_user_ip_and_agents( self, user: UserID, since_ts: int = 0 - ) -> List[Dict[str, Union[str, int]]]: + ) -> List[LastConnectionInfo]: """ Fetch IP/User Agent connection since a given timestamp. """ user_id = user.to_string() - results = {} + results: Dict[Tuple[str, str], Tuple[str, int]] = {} for key in self._batch_row_update: ( @@ -579,7 +639,7 @@ class ClientIpStore(ClientIpWorkerStore): if last_seen >= since_ts: results[(access_token, ip)] = (user_agent, last_seen) - def get_recent(txn): + def get_recent(txn: LoggingTransaction) -> List[Tuple[str, str, str, int]]: txn.execute( """ SELECT access_token, ip, user_agent, last_seen FROM user_ips @@ -589,7 +649,7 @@ class ClientIpStore(ClientIpWorkerStore): """, (since_ts, user_id), ) - return txn.fetchall() + return cast(List[Tuple[str, str, str, int]], txn.fetchall()) rows = await self.db_pool.runInteraction( desc="get_user_ip_and_agents", func=get_recent diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 10184d6ae7..ba9f71a230 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -906,7 +906,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas desc="get_latest_event_ids_in_room", ) - async def get_min_depth(self, room_id: str) -> int: + async def get_min_depth(self, room_id: str) -> Optional[int]: """For the given room, get the minimum depth we have seen for it.""" return await self.db_pool.runInteraction( "get_min_depth", self._get_min_depth_interaction, room_id diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 19f55c19c5..37439f8562 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2069,12 +2069,14 @@ class PersistEventsStore: state_groups[event.event_id] = context.state_group - self.db_pool.simple_insert_many_txn( + self.db_pool.simple_upsert_many_txn( txn, table="event_to_state_groups", - values=[ - {"state_group": state_group_id, "event_id": event_id} - for event_id, state_group_id in state_groups.items() + key_names=["event_id"], + key_values=[[event_id] for event_id, _ in state_groups.items()], + value_names=["state_group"], + value_values=[ + [state_group_id] for _, state_group_id in state_groups.items() ], ) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 4a1a2f4a6a..ae37901be9 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -55,8 +55,9 @@ from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import BackfillStream from synapse.replication.tcp.streams.events import EventsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.engines import PostgresEngine +from synapse.storage.types import Connection from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator from synapse.storage.util.sequence import build_sequence_generator from synapse.types import JsonDict, get_domain_from_id @@ -86,6 +87,47 @@ class _EventCacheEntry: redacted_event: Optional[EventBase] +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _EventRow: + """ + An event, as pulled from the database. + + Properties: + event_id: The event ID of the event. + + stream_ordering: stream ordering for this event + + json: json-encoded event structure + + internal_metadata: json-encoded internal metadata dict + + format_version: The format of the event. Hopefully one of EventFormatVersions. + 'None' means the event predates EventFormatVersions (so the event is format V1). + + room_version_id: The version of the room which contains the event. Hopefully + one of RoomVersions. + + Due to historical reasons, there may be a few events in the database which + do not have an associated room; in this case None will be returned here. + + rejected_reason: if the event was rejected, the reason why. + + redactions: a list of event-ids which (claim to) redact this event. + + outlier: True if this event is an outlier. + """ + + event_id: str + stream_ordering: int + json: str + internal_metadata: str + format_version: Optional[int] + room_version_id: Optional[int] + rejected_reason: Optional[str] + redactions: List[str] + outlier: bool + + class EventRedactBehaviour(Names): """ What to do when retrieving a redacted event from the database. @@ -686,7 +728,7 @@ class EventsWorkerStore(SQLBaseStore): for e in state_to_include.values() ] - def _do_fetch(self, conn): + def _do_fetch(self, conn: Connection) -> None: """Takes a database connection and waits for requests for events from the _event_fetch_list queue. """ @@ -713,13 +755,15 @@ class EventsWorkerStore(SQLBaseStore): self._fetch_event_list(conn, event_list) - def _fetch_event_list(self, conn, event_list): + def _fetch_event_list( + self, conn: Connection, event_list: List[Tuple[List[str], defer.Deferred]] + ) -> None: """Handle a load of requests from the _event_fetch_list queue Args: - conn (twisted.enterprise.adbapi.Connection): database connection + conn: database connection - event_list (list[Tuple[list[str], Deferred]]): + event_list: The fetch requests. Each entry consists of a list of event ids to be fetched, and a deferred to be completed once the events have been fetched. @@ -788,7 +832,7 @@ class EventsWorkerStore(SQLBaseStore): row = row_map.get(event_id) fetched_events[event_id] = row if row: - redaction_ids.update(row["redactions"]) + redaction_ids.update(row.redactions) events_to_fetch = redaction_ids.difference(fetched_events.keys()) if events_to_fetch: @@ -799,32 +843,32 @@ class EventsWorkerStore(SQLBaseStore): for event_id, row in fetched_events.items(): if not row: continue - assert row["event_id"] == event_id + assert row.event_id == event_id - rejected_reason = row["rejected_reason"] + rejected_reason = row.rejected_reason # If the event or metadata cannot be parsed, log the error and act # as if the event is unknown. try: - d = db_to_json(row["json"]) + d = db_to_json(row.json) except ValueError: logger.error("Unable to parse json from event: %s", event_id) continue try: - internal_metadata = db_to_json(row["internal_metadata"]) + internal_metadata = db_to_json(row.internal_metadata) except ValueError: logger.error( "Unable to parse internal_metadata from event: %s", event_id ) continue - format_version = row["format_version"] + format_version = row.format_version if format_version is None: # This means that we stored the event before we had the concept # of a event format version, so it must be a V1 event. format_version = EventFormatVersions.V1 - room_version_id = row["room_version_id"] + room_version_id = row.room_version_id if not room_version_id: # this should only happen for out-of-band membership events which @@ -889,8 +933,8 @@ class EventsWorkerStore(SQLBaseStore): internal_metadata_dict=internal_metadata, rejected_reason=rejected_reason, ) - original_ev.internal_metadata.stream_ordering = row["stream_ordering"] - original_ev.internal_metadata.outlier = row["outlier"] + original_ev.internal_metadata.stream_ordering = row.stream_ordering + original_ev.internal_metadata.outlier = row.outlier event_map[event_id] = original_ev @@ -898,7 +942,7 @@ class EventsWorkerStore(SQLBaseStore): # the cache entries. result_map = {} for event_id, original_ev in event_map.items(): - redactions = fetched_events[event_id]["redactions"] + redactions = fetched_events[event_id].redactions redacted_event = self._maybe_redact_event_row( original_ev, redactions, event_map ) @@ -912,17 +956,17 @@ class EventsWorkerStore(SQLBaseStore): return result_map - async def _enqueue_events(self, events): + async def _enqueue_events(self, events: Iterable[str]) -> Dict[str, _EventRow]: """Fetches events from the database using the _event_fetch_list. This allows batch and bulk fetching of events - it allows us to fetch events without having to create a new transaction for each request for events. Args: - events (Iterable[str]): events to be fetched. + events: events to be fetched. Returns: - Dict[str, Dict]: map from event id to row data from the database. - May contain events that weren't requested. + A map from event id to row data from the database. May contain events + that weren't requested. """ events_d = defer.Deferred() @@ -949,43 +993,19 @@ class EventsWorkerStore(SQLBaseStore): return row_map - def _fetch_event_rows(self, txn, event_ids): + def _fetch_event_rows( + self, txn: LoggingTransaction, event_ids: Iterable[str] + ) -> Dict[str, _EventRow]: """Fetch event rows from the database Events which are not found are omitted from the result. - The returned per-event dicts contain the following keys: - - * event_id (str) - - * stream_ordering (int): stream ordering for this event - - * json (str): json-encoded event structure - - * internal_metadata (str): json-encoded internal metadata dict - - * format_version (int|None): The format of the event. Hopefully one - of EventFormatVersions. 'None' means the event predates - EventFormatVersions (so the event is format V1). - - * room_version_id (str|None): The version of the room which contains the event. - Hopefully one of RoomVersions. - - Due to historical reasons, there may be a few events in the database which - do not have an associated room; in this case None will be returned here. - - * rejected_reason (str|None): if the event was rejected, the reason - why. - - * redactions (List[str]): a list of event-ids which (claim to) redact - this event. - Args: - txn (twisted.enterprise.adbapi.Connection): - event_ids (Iterable[str]): event IDs to fetch + txn: The database transaction. + event_ids: event IDs to fetch Returns: - Dict[str, Dict]: a map from event id to event info. + A map from event id to event info. """ event_dict = {} for evs in batch_iter(event_ids, 200): @@ -1013,17 +1033,17 @@ class EventsWorkerStore(SQLBaseStore): for row in txn: event_id = row[0] - event_dict[event_id] = { - "event_id": event_id, - "stream_ordering": row[1], - "internal_metadata": row[2], - "json": row[3], - "format_version": row[4], - "room_version_id": row[5], - "rejected_reason": row[6], - "redactions": [], - "outlier": row[7], - } + event_dict[event_id] = _EventRow( + event_id=event_id, + stream_ordering=row[1], + internal_metadata=row[2], + json=row[3], + format_version=row[4], + room_version_id=row[5], + rejected_reason=row[6], + redactions=[], + outlier=row[7], + ) # check for redactions redactions_sql = "SELECT event_id, redacts FROM redactions WHERE " @@ -1035,7 +1055,7 @@ class EventsWorkerStore(SQLBaseStore): for (redacter, redacted) in txn: d = event_dict.get(redacted) if d: - d["redactions"].append(redacter) + d.redactions.append(redacter) return event_dict diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 181841ee06..0ab56d8a07 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -2237,7 +2237,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): # accident. row = {"client_secret": None, "validated_at": None} else: - raise ThreepidValidationError(400, "Unknown session_id") + raise ThreepidValidationError("Unknown session_id") retrieved_client_secret = row["client_secret"] validated_at = row["validated_at"] @@ -2252,14 +2252,14 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): if not row: raise ThreepidValidationError( - 400, "Validation token not found or has expired" + "Validation token not found or has expired" ) expires = row["expires"] next_link = row["next_link"] if retrieved_client_secret != client_secret: raise ThreepidValidationError( - 400, "This client_secret does not match the provided session_id" + "This client_secret does not match the provided session_id" ) # If the session is already validated, no need to revalidate @@ -2268,7 +2268,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): if expires <= current_ts: raise ThreepidValidationError( - 400, "This token has expired. Please request a new one" + "This token has expired. Please request a new one" ) # Looks good. Validate the session diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index d69eaf80ce..835d7889cb 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -679,8 +679,8 @@ class RoomWorkerStore(SQLBaseStore): # policy. if not ret: return { - "min_lifetime": self.config.server.retention_default_min_lifetime, - "max_lifetime": self.config.server.retention_default_max_lifetime, + "min_lifetime": self.config.retention.retention_default_min_lifetime, + "max_lifetime": self.config.retention.retention_default_max_lifetime, } row = ret[0] @@ -690,10 +690,10 @@ class RoomWorkerStore(SQLBaseStore): # The default values will be None if no default policy has been defined, or if one # of the attributes is missing from the default policy. if row["min_lifetime"] is None: - row["min_lifetime"] = self.config.server.retention_default_min_lifetime + row["min_lifetime"] = self.config.retention.retention_default_min_lifetime if row["max_lifetime"] is None: - row["max_lifetime"] = self.config.server.retention_default_max_lifetime + row["max_lifetime"] = self.config.retention.retention_default_max_lifetime return row diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index 300a563c9e..dcbce8fdcf 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -36,3 +36,16 @@ class RoomBatchStore(SQLBaseStore): retcol="event_id", allow_none=True, ) + + async def store_state_group_id_for_event_id( + self, event_id: str, state_group_id: int + ) -> Optional[str]: + { + await self.db_pool.simple_upsert( + table="event_to_state_groups", + keyvalues={"event_id": event_id}, + values={"state_group": state_group_id, "event_id": event_id}, + # Unique constraint on event_id so we don't have to lock + lock=False, + ) + } diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 11ca47ea28..1629d2a53c 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -549,6 +549,8 @@ def _apply_module_schemas( database_engine: config: application config """ + # This is the old way for password_auth_provider modules to make changes + # to the database. This should instead be done using the module API for (mod, _config) in config.authproviders.password_providers: if not hasattr(mod, "get_db_schema_files"): continue diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 1aee741a8b..a1d2332326 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 64 # remember to update the list below when updating +SCHEMA_VERSION = 65 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -41,6 +41,10 @@ Changes in SCHEMA_VERSION = 63: Changes in SCHEMA_VERSION = 64: - MSC2716: Rename related tables and columns from "chunks" to "batches". + +Changes in SCHEMA_VERSION = 65: + - MSC2716: Remove unique event_id constraint from insertion_event_edges + because an insertion event can have multiple edges. """ diff --git a/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql new file mode 100644 index 0000000000..98b25daf45 --- /dev/null +++ b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql @@ -0,0 +1,19 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Recreate the insertion_event_edges event_id index without the unique constraint +-- because an insertion event can have multiple edges. +DROP INDEX insertion_event_edges_event_id; +CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edges(event_id); diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 936ebf3dde..e1557566e4 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -23,6 +23,7 @@ from synapse.federation.federation_base import event_from_pdu_json from synapse.logging.context import LoggingContext, run_in_background from synapse.rest import admin from synapse.rest.client import login, room +from synapse.types import create_requester from synapse.util.stringutils import random_string from tests import unittest @@ -30,6 +31,10 @@ from tests import unittest logger = logging.getLogger(__name__) +def generate_fake_event_id() -> str: + return "$fake_" + random_string(43) + + class FederationTestCase(unittest.HomeserverTestCase): servlets = [ admin.register_servlets, @@ -198,6 +203,65 @@ class FederationTestCase(unittest.HomeserverTestCase): self.assertEqual(sg, sg2) + def test_backfill_with_many_backward_extremities(self): + """ + Check that we can backfill with many backward extremities. + The goal is to make sure that when we only use a portion + of backwards extremities(the magic number is more than 5), + no errors are thrown. + + Regression test, see #11027 + """ + # create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + requester = create_requester(user_id) + + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + + ev1 = self.helper.send(room_id, "first message", tok=tok) + + # Create "many" backward extremities. The magic number we're trying to + # create more than is 5 which corresponds to the number of backward + # extremities we slice off in `_maybe_backfill_inner` + for _ in range(0, 8): + event_handler = self.hs.get_event_creation_handler() + event, context = self.get_success( + event_handler.create_event( + requester, + { + "type": "m.room.message", + "content": { + "msgtype": "m.text", + "body": "message connected to fake event", + }, + "room_id": room_id, + "sender": user_id, + }, + prev_event_ids=[ + ev1["event_id"], + # We're creating an backward extremity each time thanks + # to this fake event + generate_fake_event_id(), + ], + ) + ) + self.get_success( + event_handler.handle_new_client_event(requester, event, context) + ) + + current_depth = 1 + limit = 100 + with LoggingContext("receive_pdu"): + # Make sure backfill still works + d = run_in_background( + self.hs.get_federation_handler().maybe_backfill, + room_id, + current_depth, + limit, + ) + self.get_success(d) + def test_backfill_floating_outlier_membership_auth(self): """ As the local homeserver, check that we can properly process a federated diff --git a/tests/handlers/test_password_providers.py b/tests/handlers/test_password_providers.py index 38e6d9f536..7dd4a5a367 100644 --- a/tests/handlers/test_password_providers.py +++ b/tests/handlers/test_password_providers.py @@ -20,6 +20,8 @@ from unittest.mock import Mock from twisted.internet import defer import synapse +from synapse.handlers.auth import load_legacy_password_auth_providers +from synapse.module_api import ModuleApi from synapse.rest.client import devices, login from synapse.types import JsonDict @@ -36,8 +38,8 @@ ADDITIONAL_LOGIN_FLOWS = [{"type": "uk.half-shot.msc2778.login.application_servi mock_password_provider = Mock() -class PasswordOnlyAuthProvider: - """A password_provider which only implements `check_password`.""" +class LegacyPasswordOnlyAuthProvider: + """A legacy password_provider which only implements `check_password`.""" @staticmethod def parse_config(self): @@ -50,8 +52,8 @@ class PasswordOnlyAuthProvider: return mock_password_provider.check_password(*args) -class CustomAuthProvider: - """A password_provider which implements a custom login type.""" +class LegacyCustomAuthProvider: + """A legacy password_provider which implements a custom login type.""" @staticmethod def parse_config(self): @@ -67,7 +69,23 @@ class CustomAuthProvider: return mock_password_provider.check_auth(*args) -class PasswordCustomAuthProvider: +class CustomAuthProvider: + """A module which registers password_auth_provider callbacks for a custom login type.""" + + @staticmethod + def parse_config(self): + pass + + def __init__(self, config, api: ModuleApi): + api.register_password_auth_provider_callbacks( + auth_checkers={("test.login_type", ("test_field",)): self.check_auth}, + ) + + def check_auth(self, *args): + return mock_password_provider.check_auth(*args) + + +class LegacyPasswordCustomAuthProvider: """A password_provider which implements password login via `check_auth`, as well as a custom type.""" @@ -85,8 +103,32 @@ class PasswordCustomAuthProvider: return mock_password_provider.check_auth(*args) -def providers_config(*providers: Type[Any]) -> dict: - """Returns a config dict that will enable the given password auth providers""" +class PasswordCustomAuthProvider: + """A module which registers password_auth_provider callbacks for a custom login type. + as well as a password login""" + + @staticmethod + def parse_config(self): + pass + + def __init__(self, config, api: ModuleApi): + api.register_password_auth_provider_callbacks( + auth_checkers={ + ("test.login_type", ("test_field",)): self.check_auth, + ("m.login.password", ("password",)): self.check_auth, + }, + ) + pass + + def check_auth(self, *args): + return mock_password_provider.check_auth(*args) + + def check_pass(self, *args): + return mock_password_provider.check_password(*args) + + +def legacy_providers_config(*providers: Type[Any]) -> dict: + """Returns a config dict that will enable the given legacy password auth providers""" return { "password_providers": [ {"module": "%s.%s" % (__name__, provider.__qualname__), "config": {}} @@ -95,6 +137,16 @@ def providers_config(*providers: Type[Any]) -> dict: } +def providers_config(*providers: Type[Any]) -> dict: + """Returns a config dict that will enable the given modules""" + return { + "modules": [ + {"module": "%s.%s" % (__name__, provider.__qualname__), "config": {}} + for provider in providers + ] + } + + class PasswordAuthProviderTests(unittest.HomeserverTestCase): servlets = [ synapse.rest.admin.register_servlets, @@ -107,8 +159,21 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): mock_password_provider.reset_mock() super().setUp() - @override_config(providers_config(PasswordOnlyAuthProvider)) - def test_password_only_auth_provider_login(self): + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver() + # Load the modules into the homeserver + module_api = hs.get_module_api() + for module, config in hs.config.modules.loaded_modules: + module(config=config, api=module_api) + load_legacy_password_auth_providers(hs) + + return hs + + @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider)) + def test_password_only_auth_progiver_login_legacy(self): + self.password_only_auth_provider_login_test_body() + + def password_only_auth_provider_login_test_body(self): # login flows should only have m.login.password flows = self._get_login_flows() self.assertEqual(flows, [{"type": "m.login.password"}] + ADDITIONAL_LOGIN_FLOWS) @@ -138,8 +203,11 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): "@ USER🙂NAME :test", " pASS😢word " ) - @override_config(providers_config(PasswordOnlyAuthProvider)) - def test_password_only_auth_provider_ui_auth(self): + @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider)) + def test_password_only_auth_provider_ui_auth_legacy(self): + self.password_only_auth_provider_ui_auth_test_body() + + def password_only_auth_provider_ui_auth_test_body(self): """UI Auth should delegate correctly to the password provider""" # create the user, otherwise access doesn't work @@ -172,8 +240,11 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): self.assertEqual(channel.code, 200) mock_password_provider.check_password.assert_called_once_with("@u:test", "p") - @override_config(providers_config(PasswordOnlyAuthProvider)) - def test_local_user_fallback_login(self): + @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider)) + def test_local_user_fallback_login_legacy(self): + self.local_user_fallback_login_test_body() + + def local_user_fallback_login_test_body(self): """rejected login should fall back to local db""" self.register_user("localuser", "localpass") @@ -186,8 +257,11 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): self.assertEqual(channel.code, 200, channel.result) self.assertEqual("@localuser:test", channel.json_body["user_id"]) - @override_config(providers_config(PasswordOnlyAuthProvider)) - def test_local_user_fallback_ui_auth(self): + @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider)) + def test_local_user_fallback_ui_auth_legacy(self): + self.local_user_fallback_ui_auth_test_body() + + def local_user_fallback_ui_auth_test_body(self): """rejected login should fall back to local db""" self.register_user("localuser", "localpass") @@ -223,11 +297,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): @override_config( { - **providers_config(PasswordOnlyAuthProvider), + **legacy_providers_config(LegacyPasswordOnlyAuthProvider), "password_config": {"localdb_enabled": False}, } ) - def test_no_local_user_fallback_login(self): + def test_no_local_user_fallback_login_legacy(self): + self.no_local_user_fallback_login_test_body() + + def no_local_user_fallback_login_test_body(self): """localdb_enabled can block login with the local password""" self.register_user("localuser", "localpass") @@ -242,11 +319,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): @override_config( { - **providers_config(PasswordOnlyAuthProvider), + **legacy_providers_config(LegacyPasswordOnlyAuthProvider), "password_config": {"localdb_enabled": False}, } ) - def test_no_local_user_fallback_ui_auth(self): + def test_no_local_user_fallback_ui_auth_legacy(self): + self.no_local_user_fallback_ui_auth_test_body() + + def no_local_user_fallback_ui_auth_test_body(self): """localdb_enabled can block ui auth with the local password""" self.register_user("localuser", "localpass") @@ -280,11 +360,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): @override_config( { - **providers_config(PasswordOnlyAuthProvider), + **legacy_providers_config(LegacyPasswordOnlyAuthProvider), "password_config": {"enabled": False}, } ) - def test_password_auth_disabled(self): + def test_password_auth_disabled_legacy(self): + self.password_auth_disabled_test_body() + + def password_auth_disabled_test_body(self): """password auth doesn't work if it's disabled across the board""" # login flows should be empty flows = self._get_login_flows() @@ -295,8 +378,15 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): self.assertEqual(channel.code, 400, channel.result) mock_password_provider.check_password.assert_not_called() + @override_config(legacy_providers_config(LegacyCustomAuthProvider)) + def test_custom_auth_provider_login_legacy(self): + self.custom_auth_provider_login_test_body() + @override_config(providers_config(CustomAuthProvider)) def test_custom_auth_provider_login(self): + self.custom_auth_provider_login_test_body() + + def custom_auth_provider_login_test_body(self): # login flows should have the custom flow and m.login.password, since we # haven't disabled local password lookup. # (password must come first, because reasons) @@ -312,7 +402,9 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): self.assertEqual(channel.code, 400, channel.result) mock_password_provider.check_auth.assert_not_called() - mock_password_provider.check_auth.return_value = defer.succeed("@user:bz") + mock_password_provider.check_auth.return_value = defer.succeed( + ("@user:bz", None) + ) channel = self._send_login("test.login_type", "u", test_field="y") self.assertEqual(channel.code, 200, channel.result) self.assertEqual("@user:bz", channel.json_body["user_id"]) @@ -325,7 +417,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # in these cases, but at least we can guard against the API changing # unexpectedly mock_password_provider.check_auth.return_value = defer.succeed( - "@ MALFORMED! :bz" + ("@ MALFORMED! :bz", None) ) channel = self._send_login("test.login_type", " USER🙂NAME ", test_field=" abc ") self.assertEqual(channel.code, 200, channel.result) @@ -334,8 +426,15 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): " USER🙂NAME ", "test.login_type", {"test_field": " abc "} ) + @override_config(legacy_providers_config(LegacyCustomAuthProvider)) + def test_custom_auth_provider_ui_auth_legacy(self): + self.custom_auth_provider_ui_auth_test_body() + @override_config(providers_config(CustomAuthProvider)) def test_custom_auth_provider_ui_auth(self): + self.custom_auth_provider_ui_auth_test_body() + + def custom_auth_provider_ui_auth_test_body(self): # register the user and log in twice, to get two devices self.register_user("localuser", "localpass") tok1 = self.login("localuser", "localpass") @@ -367,7 +466,9 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): mock_password_provider.reset_mock() # right params, but authing as the wrong user - mock_password_provider.check_auth.return_value = defer.succeed("@user:bz") + mock_password_provider.check_auth.return_value = defer.succeed( + ("@user:bz", None) + ) body["auth"]["test_field"] = "foo" channel = self._delete_device(tok1, "dev2", body) self.assertEqual(channel.code, 403) @@ -379,7 +480,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # and finally, succeed mock_password_provider.check_auth.return_value = defer.succeed( - "@localuser:test" + ("@localuser:test", None) ) channel = self._delete_device(tok1, "dev2", body) self.assertEqual(channel.code, 200) @@ -387,8 +488,15 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): "localuser", "test.login_type", {"test_field": "foo"} ) + @override_config(legacy_providers_config(LegacyCustomAuthProvider)) + def test_custom_auth_provider_callback_legacy(self): + self.custom_auth_provider_callback_test_body() + @override_config(providers_config(CustomAuthProvider)) def test_custom_auth_provider_callback(self): + self.custom_auth_provider_callback_test_body() + + def custom_auth_provider_callback_test_body(self): callback = Mock(return_value=defer.succeed(None)) mock_password_provider.check_auth.return_value = defer.succeed( @@ -411,9 +519,21 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): self.assertIn(p, call_args[0]) @override_config( + { + **legacy_providers_config(LegacyCustomAuthProvider), + "password_config": {"enabled": False}, + } + ) + def test_custom_auth_password_disabled_legacy(self): + self.custom_auth_password_disabled_test_body() + + @override_config( {**providers_config(CustomAuthProvider), "password_config": {"enabled": False}} ) def test_custom_auth_password_disabled(self): + self.custom_auth_password_disabled_test_body() + + def custom_auth_password_disabled_test_body(self): """Test login with a custom auth provider where password login is disabled""" self.register_user("localuser", "localpass") @@ -427,11 +547,23 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): @override_config( { + **legacy_providers_config(LegacyCustomAuthProvider), + "password_config": {"enabled": False, "localdb_enabled": False}, + } + ) + def test_custom_auth_password_disabled_localdb_enabled_legacy(self): + self.custom_auth_password_disabled_localdb_enabled_test_body() + + @override_config( + { **providers_config(CustomAuthProvider), "password_config": {"enabled": False, "localdb_enabled": False}, } ) def test_custom_auth_password_disabled_localdb_enabled(self): + self.custom_auth_password_disabled_localdb_enabled_test_body() + + def custom_auth_password_disabled_localdb_enabled_test_body(self): """Check the localdb_enabled == enabled == False Regression test for https://github.com/matrix-org/synapse/issues/8914: check @@ -450,11 +582,23 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): @override_config( { + **legacy_providers_config(LegacyPasswordCustomAuthProvider), + "password_config": {"enabled": False}, + } + ) + def test_password_custom_auth_password_disabled_login_legacy(self): + self.password_custom_auth_password_disabled_login_test_body() + + @override_config( + { **providers_config(PasswordCustomAuthProvider), "password_config": {"enabled": False}, } ) def test_password_custom_auth_password_disabled_login(self): + self.password_custom_auth_password_disabled_login_test_body() + + def password_custom_auth_password_disabled_login_test_body(self): """log in with a custom auth provider which implements password, but password login is disabled""" self.register_user("localuser", "localpass") @@ -466,6 +610,16 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): channel = self._send_password_login("localuser", "localpass") self.assertEqual(channel.code, 400, channel.result) mock_password_provider.check_auth.assert_not_called() + mock_password_provider.check_password.assert_not_called() + + @override_config( + { + **legacy_providers_config(LegacyPasswordCustomAuthProvider), + "password_config": {"enabled": False}, + } + ) + def test_password_custom_auth_password_disabled_ui_auth_legacy(self): + self.password_custom_auth_password_disabled_ui_auth_test_body() @override_config( { @@ -474,12 +628,15 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): } ) def test_password_custom_auth_password_disabled_ui_auth(self): + self.password_custom_auth_password_disabled_ui_auth_test_body() + + def password_custom_auth_password_disabled_ui_auth_test_body(self): """UI Auth with a custom auth provider which implements password, but password login is disabled""" # register the user and log in twice via the test login type to get two devices, self.register_user("localuser", "localpass") mock_password_provider.check_auth.return_value = defer.succeed( - "@localuser:test" + ("@localuser:test", None) ) channel = self._send_login("test.login_type", "localuser", test_field="") self.assertEqual(channel.code, 200, channel.result) @@ -516,6 +673,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): "Password login has been disabled.", channel.json_body["error"] ) mock_password_provider.check_auth.assert_not_called() + mock_password_provider.check_password.assert_not_called() mock_password_provider.reset_mock() # successful auth @@ -526,6 +684,16 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): mock_password_provider.check_auth.assert_called_once_with( "localuser", "test.login_type", {"test_field": "x"} ) + mock_password_provider.check_password.assert_not_called() + + @override_config( + { + **legacy_providers_config(LegacyCustomAuthProvider), + "password_config": {"localdb_enabled": False}, + } + ) + def test_custom_auth_no_local_user_fallback_legacy(self): + self.custom_auth_no_local_user_fallback_test_body() @override_config( { @@ -534,6 +702,9 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): } ) def test_custom_auth_no_local_user_fallback(self): + self.custom_auth_no_local_user_fallback_test_body() + + def custom_auth_no_local_user_fallback_test_body(self): """Test login with a custom auth provider where the local db is disabled""" self.register_user("localuser", "localpass") diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 0120b4688b..b9ad92b977 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -109,18 +109,14 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): tok=alice_token, ) - users = self.get_success(self.user_dir_helper.get_users_in_user_directory()) - in_public = self.get_success(self.user_dir_helper.get_users_in_public_rooms()) - in_private = self.get_success( - self.user_dir_helper.get_users_who_share_private_rooms() + # The user directory should reflect the room memberships above. + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() ) - self.assertEqual(users, {alice, bob}) + self.assertEqual(in_public, {(alice, public), (bob, public), (alice, public2)}) self.assertEqual( - set(in_public), {(alice, public), (bob, public), (alice, public2)} - ) - self.assertEqual( - self.user_dir_helper._compress_shared(in_private), + in_private, {(alice, bob, private), (bob, alice, private)}, ) @@ -209,6 +205,88 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): in_public = self.get_success(self.user_dir_helper.get_users_in_public_rooms()) self.assertEqual(set(in_public), {(user1, room), (user2, room)}) + def test_excludes_users_when_making_room_public(self) -> None: + # Create a regular user and a support user. + alice = self.register_user("alice", "pass") + alice_token = self.login(alice, "pass") + support = "@support1:test" + self.get_success( + self.store.register_user( + user_id=support, password_hash=None, user_type=UserTypes.SUPPORT + ) + ) + + # Make a public and private room containing Alice and the support user + public, initially_private = self._create_rooms_and_inject_memberships( + alice, alice_token, support + ) + self._check_only_one_user_in_directory(alice, public) + + # Alice makes the private room public. + self.helper.send_state( + initially_private, + "m.room.join_rules", + {"join_rule": "public"}, + tok=alice_token, + ) + + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() + ) + self.assertEqual(users, {alice}) + self.assertEqual(in_public, {(alice, public), (alice, initially_private)}) + self.assertEqual(in_private, set()) + + def test_switching_from_private_to_public_to_private(self) -> None: + """Check we update the room sharing tables when switching a room + from private to public, then back again to private.""" + # Alice and Bob share a private room. + alice = self.register_user("alice", "pass") + alice_token = self.login(alice, "pass") + bob = self.register_user("bob", "pass") + bob_token = self.login(bob, "pass") + room = self.helper.create_room_as(alice, is_public=False, tok=alice_token) + self.helper.invite(room, alice, bob, tok=alice_token) + self.helper.join(room, bob, tok=bob_token) + + # The user directory should reflect this. + def check_user_dir_for_private_room() -> None: + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() + ) + self.assertEqual(users, {alice, bob}) + self.assertEqual(in_public, set()) + self.assertEqual(in_private, {(alice, bob, room), (bob, alice, room)}) + + check_user_dir_for_private_room() + + # Alice makes the room public. + self.helper.send_state( + room, + "m.room.join_rules", + {"join_rule": "public"}, + tok=alice_token, + ) + + # The user directory should be updated accordingly + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() + ) + self.assertEqual(users, {alice, bob}) + self.assertEqual(in_public, {(alice, room), (bob, room)}) + self.assertEqual(in_private, set()) + + # Alice makes the room private. + self.helper.send_state( + room, + "m.room.join_rules", + {"join_rule": "invite"}, + tok=alice_token, + ) + + # The user directory should be updated accordingly + check_user_dir_for_private_room() + def _create_rooms_and_inject_memberships( self, creator: str, token: str, joiner: str ) -> Tuple[str, str]: @@ -232,15 +310,18 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): return public_room, private_room def _check_only_one_user_in_directory(self, user: str, public: str) -> None: - users = self.get_success(self.user_dir_helper.get_users_in_user_directory()) - in_public = self.get_success(self.user_dir_helper.get_users_in_public_rooms()) - in_private = self.get_success( - self.user_dir_helper.get_users_who_share_private_rooms() - ) + """Check that the user directory DB tables show that: + - only one user is in the user directory + - they belong to exactly one public room + - they don't share a private room with anyone. + """ + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() + ) self.assertEqual(users, {user}) - self.assertEqual(set(in_public), {(user, public)}) - self.assertEqual(in_private, []) + self.assertEqual(in_public, {(user, public)}) + self.assertEqual(in_private, set()) def test_handle_local_profile_change_with_support_user(self) -> None: support_user_id = "@support:test" @@ -581,11 +662,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.user_dir_helper.get_users_in_public_rooms() ) - self.assertEqual( - self.user_dir_helper._compress_shared(shares_private), - {(u1, u2, room), (u2, u1, room)}, - ) - self.assertEqual(public_users, []) + self.assertEqual(shares_private, {(u1, u2, room), (u2, u1, room)}) + self.assertEqual(public_users, set()) # We get one search result when searching for user2 by user1. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -610,8 +688,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.user_dir_helper.get_users_in_public_rooms() ) - self.assertEqual(self.user_dir_helper._compress_shared(shares_private), set()) - self.assertEqual(public_users, []) + self.assertEqual(shares_private, set()) + self.assertEqual(public_users, set()) # User1 now gets no search results for any of the other users. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -645,11 +723,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.user_dir_helper.get_users_in_public_rooms() ) - self.assertEqual( - self.user_dir_helper._compress_shared(shares_private), - {(u1, u2, room), (u2, u1, room)}, - ) - self.assertEqual(public_users, []) + self.assertEqual(shares_private, {(u1, u2, room), (u2, u1, room)}) + self.assertEqual(public_users, set()) # We get one search result when searching for user2 by user1. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -704,11 +779,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.user_dir_helper.get_users_in_public_rooms() ) - self.assertEqual( - self.user_dir_helper._compress_shared(shares_private), - {(u1, u2, room), (u2, u1, room)}, - ) - self.assertEqual(public_users, []) + self.assertEqual(shares_private, {(u1, u2, room), (u2, u1, room)}) + self.assertEqual(public_users, set()) # Configure a spam checker. spam_checker = self.hs.get_spam_checker() @@ -740,8 +812,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): ) # No users share rooms - self.assertEqual(public_users, []) - self.assertEqual(self.user_dir_helper._compress_shared(shares_private), set()) + self.assertEqual(public_users, set()) + self.assertEqual(shares_private, set()) # Despite not sharing a room, search_all_users means we get a search # result. @@ -842,6 +914,56 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.hs.get_storage().persistence.persist_event(event, context) ) + def test_local_user_leaving_room_remains_in_user_directory(self) -> None: + """We've chosen to simplify the user directory's implementation by + always including local users. Ensure this invariant is maintained when + a local user + - leaves a room, and + - leaves the last room they're in which is visible to this server. + + This is user-visible if the "search_all_users" config option is on: the + local user who left a room would no longer be searchable if this test fails! + """ + alice = self.register_user("alice", "pass") + alice_token = self.login(alice, "pass") + bob = self.register_user("bob", "pass") + bob_token = self.login(bob, "pass") + + # Alice makes two public rooms, which Bob joins. + room1 = self.helper.create_room_as(alice, is_public=True, tok=alice_token) + room2 = self.helper.create_room_as(alice, is_public=True, tok=alice_token) + self.helper.join(room1, bob, tok=bob_token) + self.helper.join(room2, bob, tok=bob_token) + + # The user directory tables are updated. + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() + ) + self.assertEqual(users, {alice, bob}) + self.assertEqual( + in_public, {(alice, room1), (alice, room2), (bob, room1), (bob, room2)} + ) + self.assertEqual(in_private, set()) + + # Alice leaves one room. She should still be in the directory. + self.helper.leave(room1, alice, tok=alice_token) + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() + ) + self.assertEqual(users, {alice, bob}) + self.assertEqual(in_public, {(alice, room2), (bob, room1), (bob, room2)}) + self.assertEqual(in_private, set()) + + # Alice leaves the other. She should still be in the directory. + self.helper.leave(room2, alice, tok=alice_token) + self.wait_for_background_updates() + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() + ) + self.assertEqual(users, {alice, bob}) + self.assertEqual(in_public, {(bob, room1), (bob, room2)}) + self.assertEqual(in_private, set()) + class TestUserDirSearchDisabled(unittest.HomeserverTestCase): servlets = [ diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 6ed9e42173..c9e2754b09 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -14,14 +14,13 @@ import hashlib import hmac -import json import os import urllib.parse from binascii import unhexlify from typing import List, Optional from unittest.mock import Mock, patch -from parameterized import parameterized +from parameterized import parameterized, parameterized_class import synapse.rest.admin from synapse.api.constants import UserTypes @@ -104,8 +103,8 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): # 59 seconds self.reactor.advance(59) - body = json.dumps({"nonce": nonce}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = {"nonce": nonce} + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("username must be specified", channel.json_body["error"]) @@ -113,7 +112,7 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): # 61 seconds self.reactor.advance(2) - channel = self.make_request("POST", self.url, body.encode("utf8")) + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("unrecognised nonce", channel.json_body["error"]) @@ -129,18 +128,16 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): want_mac.update(b"notthenonce\x00bob\x00abc123\x00admin") want_mac = want_mac.hexdigest() - body = json.dumps( - { - "nonce": nonce, - "username": "bob", - "password": "abc123", - "admin": True, - "mac": want_mac, - } - ) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = { + "nonce": nonce, + "username": "bob", + "password": "abc123", + "admin": True, + "mac": want_mac, + } + channel = self.make_request("POST", self.url, body) - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(403, channel.code, msg=channel.json_body) self.assertEqual("HMAC incorrect", channel.json_body["error"]) def test_register_correct_nonce(self): @@ -157,17 +154,15 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): ) want_mac = want_mac.hexdigest() - body = json.dumps( - { - "nonce": nonce, - "username": "bob", - "password": "abc123", - "admin": True, - "user_type": UserTypes.SUPPORT, - "mac": want_mac, - } - ) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = { + "nonce": nonce, + "username": "bob", + "password": "abc123", + "admin": True, + "user_type": UserTypes.SUPPORT, + "mac": want_mac, + } + channel = self.make_request("POST", self.url, body) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["user_id"]) @@ -183,22 +178,20 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): want_mac.update(nonce.encode("ascii") + b"\x00bob\x00abc123\x00admin") want_mac = want_mac.hexdigest() - body = json.dumps( - { - "nonce": nonce, - "username": "bob", - "password": "abc123", - "admin": True, - "mac": want_mac, - } - ) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = { + "nonce": nonce, + "username": "bob", + "password": "abc123", + "admin": True, + "mac": want_mac, + } + channel = self.make_request("POST", self.url, body) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["user_id"]) # Now, try and reuse it - channel = self.make_request("POST", self.url, body.encode("utf8")) + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("unrecognised nonce", channel.json_body["error"]) @@ -218,9 +211,8 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): # Nonce check # - # Must be present - body = json.dumps({}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + # Must be an empty body present + channel = self.make_request("POST", self.url, {}) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("nonce must be specified", channel.json_body["error"]) @@ -230,29 +222,28 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): # # Must be present - body = json.dumps({"nonce": nonce()}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + channel = self.make_request("POST", self.url, {"nonce": nonce()}) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("username must be specified", channel.json_body["error"]) # Must be a string - body = json.dumps({"nonce": nonce(), "username": 1234}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = {"nonce": nonce(), "username": 1234} + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("Invalid username", channel.json_body["error"]) # Must not have null bytes - body = json.dumps({"nonce": nonce(), "username": "abcd\u0000"}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = {"nonce": nonce(), "username": "abcd\u0000"} + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("Invalid username", channel.json_body["error"]) # Must not have null bytes - body = json.dumps({"nonce": nonce(), "username": "a" * 1000}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = {"nonce": nonce(), "username": "a" * 1000} + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("Invalid username", channel.json_body["error"]) @@ -262,29 +253,29 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): # # Must be present - body = json.dumps({"nonce": nonce(), "username": "a"}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = {"nonce": nonce(), "username": "a"} + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("password must be specified", channel.json_body["error"]) # Must be a string - body = json.dumps({"nonce": nonce(), "username": "a", "password": 1234}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = {"nonce": nonce(), "username": "a", "password": 1234} + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("Invalid password", channel.json_body["error"]) # Must not have null bytes - body = json.dumps({"nonce": nonce(), "username": "a", "password": "abcd\u0000"}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = {"nonce": nonce(), "username": "a", "password": "abcd\u0000"} + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("Invalid password", channel.json_body["error"]) # Super long - body = json.dumps({"nonce": nonce(), "username": "a", "password": "A" * 1000}) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = {"nonce": nonce(), "username": "a", "password": "A" * 1000} + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("Invalid password", channel.json_body["error"]) @@ -294,15 +285,13 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): # # Invalid user_type - body = json.dumps( - { - "nonce": nonce(), - "username": "a", - "password": "1234", - "user_type": "invalid", - } - ) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = { + "nonce": nonce(), + "username": "a", + "password": "1234", + "user_type": "invalid", + } + channel = self.make_request("POST", self.url, body) self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual("Invalid user type", channel.json_body["error"]) @@ -320,10 +309,14 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): want_mac.update(nonce.encode("ascii") + b"\x00bob1\x00abc123\x00notadmin") want_mac = want_mac.hexdigest() - body = json.dumps( - {"nonce": nonce, "username": "bob1", "password": "abc123", "mac": want_mac} - ) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = { + "nonce": nonce, + "username": "bob1", + "password": "abc123", + "mac": want_mac, + } + + channel = self.make_request("POST", self.url, body) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual("@bob1:test", channel.json_body["user_id"]) @@ -340,16 +333,14 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): want_mac.update(nonce.encode("ascii") + b"\x00bob2\x00abc123\x00notadmin") want_mac = want_mac.hexdigest() - body = json.dumps( - { - "nonce": nonce, - "username": "bob2", - "displayname": None, - "password": "abc123", - "mac": want_mac, - } - ) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = { + "nonce": nonce, + "username": "bob2", + "displayname": None, + "password": "abc123", + "mac": want_mac, + } + channel = self.make_request("POST", self.url, body) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual("@bob2:test", channel.json_body["user_id"]) @@ -366,22 +357,20 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): want_mac.update(nonce.encode("ascii") + b"\x00bob3\x00abc123\x00notadmin") want_mac = want_mac.hexdigest() - body = json.dumps( - { - "nonce": nonce, - "username": "bob3", - "displayname": "", - "password": "abc123", - "mac": want_mac, - } - ) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = { + "nonce": nonce, + "username": "bob3", + "displayname": "", + "password": "abc123", + "mac": want_mac, + } + channel = self.make_request("POST", self.url, body) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual("@bob3:test", channel.json_body["user_id"]) channel = self.make_request("GET", "/profile/@bob3:test/displayname") - self.assertEqual(404, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(404, channel.code, msg=channel.json_body) # set displayname channel = self.make_request("GET", self.url) @@ -391,16 +380,14 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): want_mac.update(nonce.encode("ascii") + b"\x00bob4\x00abc123\x00notadmin") want_mac = want_mac.hexdigest() - body = json.dumps( - { - "nonce": nonce, - "username": "bob4", - "displayname": "Bob's Name", - "password": "abc123", - "mac": want_mac, - } - ) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = { + "nonce": nonce, + "username": "bob4", + "displayname": "Bob's Name", + "password": "abc123", + "mac": want_mac, + } + channel = self.make_request("POST", self.url, body) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual("@bob4:test", channel.json_body["user_id"]) @@ -440,17 +427,15 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): ) want_mac = want_mac.hexdigest() - body = json.dumps( - { - "nonce": nonce, - "username": "bob", - "password": "abc123", - "admin": True, - "user_type": UserTypes.SUPPORT, - "mac": want_mac, - } - ) - channel = self.make_request("POST", self.url, body.encode("utf8")) + body = { + "nonce": nonce, + "username": "bob", + "password": "abc123", + "admin": True, + "user_type": UserTypes.SUPPORT, + "mac": want_mac, + } + channel = self.make_request("POST", self.url, body) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["user_id"]) @@ -993,12 +978,11 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase): """ If parameter `erase` is not boolean, return an error """ - body = json.dumps({"erase": "False"}) channel = self.make_request( "POST", self.url, - content=body.encode(encoding="utf_8"), + content={"erase": "False"}, access_token=self.admin_user_tok, ) @@ -2201,7 +2185,7 @@ class UserMembershipRestTestCase(unittest.HomeserverTestCase): """ channel = self.make_request("GET", self.url, b"{}") - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(401, channel.code, msg=channel.json_body) self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) def test_requester_is_no_admin(self): @@ -2216,7 +2200,7 @@ class UserMembershipRestTestCase(unittest.HomeserverTestCase): access_token=other_user_token, ) - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(403, channel.code, msg=channel.json_body) self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) def test_user_does_not_exist(self): @@ -2359,7 +2343,7 @@ class PushersRestTestCase(unittest.HomeserverTestCase): """ channel = self.make_request("GET", self.url, b"{}") - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(401, channel.code, msg=channel.json_body) self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) def test_requester_is_no_admin(self): @@ -2374,7 +2358,7 @@ class PushersRestTestCase(unittest.HomeserverTestCase): access_token=other_user_token, ) - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(403, channel.code, msg=channel.json_body) self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) def test_user_does_not_exist(self): @@ -3073,7 +3057,7 @@ class UserTokenRestTestCase(unittest.HomeserverTestCase): """Try to login as a user without authentication.""" channel = self.make_request("POST", self.url, b"{}") - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(401, channel.code, msg=channel.json_body) self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) def test_not_admin(self): @@ -3082,7 +3066,7 @@ class UserTokenRestTestCase(unittest.HomeserverTestCase): "POST", self.url, b"{}", access_token=self.other_user_tok ) - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(403, channel.code, msg=channel.json_body) def test_send_event(self): """Test that sending event as a user works.""" @@ -3127,7 +3111,7 @@ class UserTokenRestTestCase(unittest.HomeserverTestCase): # The puppet token should no longer work channel = self.make_request("GET", "devices", b"{}", access_token=puppet_token) - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(401, channel.code, msg=channel.json_body) # .. but the real user's tokens should still work channel = self.make_request( @@ -3160,7 +3144,7 @@ class UserTokenRestTestCase(unittest.HomeserverTestCase): channel = self.make_request( "GET", "devices", b"{}", access_token=self.other_user_tok ) - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(401, channel.code, msg=channel.json_body) def test_admin_logout_all(self): """Tests that the admin user calling `/logout/all` does expire the @@ -3181,7 +3165,7 @@ class UserTokenRestTestCase(unittest.HomeserverTestCase): # The puppet token should no longer work channel = self.make_request("GET", "devices", b"{}", access_token=puppet_token) - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(401, channel.code, msg=channel.json_body) # .. but the real user's tokens should still work channel = self.make_request( @@ -3242,6 +3226,13 @@ class UserTokenRestTestCase(unittest.HomeserverTestCase): self.helper.join(room_id, user=self.other_user, tok=puppet_token) +@parameterized_class( + ("url_prefix",), + [ + ("/_synapse/admin/v1/whois/%s",), + ("/_matrix/client/r0/admin/whois/%s",), + ], +) class WhoisRestTestCase(unittest.HomeserverTestCase): servlets = [ @@ -3254,21 +3245,14 @@ class WhoisRestTestCase(unittest.HomeserverTestCase): self.admin_user_tok = self.login("admin", "pass") self.other_user = self.register_user("user", "pass") - self.url1 = "/_synapse/admin/v1/whois/%s" % urllib.parse.quote(self.other_user) - self.url2 = "/_matrix/client/r0/admin/whois/%s" % urllib.parse.quote( - self.other_user - ) + self.url = self.url_prefix % self.other_user def test_no_auth(self): """ Try to get information of an user without authentication. """ - channel = self.make_request("GET", self.url1, b"{}") - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) - - channel = self.make_request("GET", self.url2, b"{}") - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + channel = self.make_request("GET", self.url, b"{}") + self.assertEqual(401, channel.code, msg=channel.json_body) self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) def test_requester_is_not_admin(self): @@ -3280,38 +3264,21 @@ class WhoisRestTestCase(unittest.HomeserverTestCase): channel = self.make_request( "GET", - self.url1, - access_token=other_user2_token, - ) - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) - - channel = self.make_request( - "GET", - self.url2, + self.url, access_token=other_user2_token, ) - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(403, channel.code, msg=channel.json_body) self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) def test_user_is_not_local(self): """ Tests that a lookup for a user that is not a local returns a 400 """ - url1 = "/_synapse/admin/v1/whois/@unknown_person:unknown_domain" - url2 = "/_matrix/client/r0/admin/whois/@unknown_person:unknown_domain" - - channel = self.make_request( - "GET", - url1, - access_token=self.admin_user_tok, - ) - self.assertEqual(400, channel.code, msg=channel.json_body) - self.assertEqual("Can only whois a local user", channel.json_body["error"]) + url = self.url_prefix % "@unknown_person:unknown_domain" channel = self.make_request( "GET", - url2, + url, access_token=self.admin_user_tok, ) self.assertEqual(400, channel.code, msg=channel.json_body) @@ -3323,16 +3290,7 @@ class WhoisRestTestCase(unittest.HomeserverTestCase): """ channel = self.make_request( "GET", - self.url1, - access_token=self.admin_user_tok, - ) - self.assertEqual(200, channel.code, msg=channel.json_body) - self.assertEqual(self.other_user, channel.json_body["user_id"]) - self.assertIn("devices", channel.json_body) - - channel = self.make_request( - "GET", - self.url2, + self.url, access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) @@ -3347,16 +3305,7 @@ class WhoisRestTestCase(unittest.HomeserverTestCase): channel = self.make_request( "GET", - self.url1, - access_token=other_user_token, - ) - self.assertEqual(200, channel.code, msg=channel.json_body) - self.assertEqual(self.other_user, channel.json_body["user_id"]) - self.assertIn("devices", channel.json_body) - - channel = self.make_request( - "GET", - self.url2, + self.url, access_token=other_user_token, ) self.assertEqual(200, channel.code, msg=channel.json_body) @@ -3388,7 +3337,7 @@ class ShadowBanRestTestCase(unittest.HomeserverTestCase): Try to get information of an user without authentication. """ channel = self.make_request("POST", self.url) - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(401, channel.code, msg=channel.json_body) self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) def test_requester_is_not_admin(self): @@ -3398,7 +3347,7 @@ class ShadowBanRestTestCase(unittest.HomeserverTestCase): other_user_token = self.login("user", "pass") channel = self.make_request("POST", self.url, access_token=other_user_token) - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(403, channel.code, msg=channel.json_body) self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) def test_user_is_not_local(self): @@ -3447,84 +3396,41 @@ class RateLimitTestCase(unittest.HomeserverTestCase): % urllib.parse.quote(self.other_user) ) - def test_no_auth(self): + @parameterized.expand(["GET", "POST", "DELETE"]) + def test_no_auth(self, method: str): """ Try to get information of a user without authentication. """ - channel = self.make_request("GET", self.url, b"{}") - - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) - - channel = self.make_request("POST", self.url, b"{}") + channel = self.make_request(method, self.url, b"{}") - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) - - channel = self.make_request("DELETE", self.url, b"{}") - - self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(401, channel.code, msg=channel.json_body) self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) - def test_requester_is_no_admin(self): + @parameterized.expand(["GET", "POST", "DELETE"]) + def test_requester_is_no_admin(self, method: str): """ If the user is not a server admin, an error is returned. """ other_user_token = self.login("user", "pass") channel = self.make_request( - "GET", - self.url, - access_token=other_user_token, - ) - - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) - - channel = self.make_request( - "POST", - self.url, - access_token=other_user_token, - ) - - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) - - channel = self.make_request( - "DELETE", + method, self.url, access_token=other_user_token, ) - self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(403, channel.code, msg=channel.json_body) self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) - def test_user_does_not_exist(self): + @parameterized.expand(["GET", "POST", "DELETE"]) + def test_user_does_not_exist(self, method: str): """ Tests that a lookup for a user that does not exist returns a 404 """ url = "/_synapse/admin/v1/users/@unknown_person:test/override_ratelimit" channel = self.make_request( - "GET", - url, - access_token=self.admin_user_tok, - ) - - self.assertEqual(404, channel.code, msg=channel.json_body) - self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) - - channel = self.make_request( - "POST", - url, - access_token=self.admin_user_tok, - ) - - self.assertEqual(404, channel.code, msg=channel.json_body) - self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) - - channel = self.make_request( - "DELETE", + method, url, access_token=self.admin_user_tok, ) @@ -3532,7 +3438,14 @@ class RateLimitTestCase(unittest.HomeserverTestCase): self.assertEqual(404, channel.code, msg=channel.json_body) self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) - def test_user_is_not_local(self): + @parameterized.expand( + [ + ("GET", "Can only look up local users"), + ("POST", "Only local users can be ratelimited"), + ("DELETE", "Only local users can be ratelimited"), + ] + ) + def test_user_is_not_local(self, method: str, error_msg: str): """ Tests that a lookup for a user that is not a local returns a 400 """ @@ -3541,35 +3454,13 @@ class RateLimitTestCase(unittest.HomeserverTestCase): ) channel = self.make_request( - "GET", - url, - access_token=self.admin_user_tok, - ) - - self.assertEqual(400, channel.code, msg=channel.json_body) - self.assertEqual("Can only look up local users", channel.json_body["error"]) - - channel = self.make_request( - "POST", - url, - access_token=self.admin_user_tok, - ) - - self.assertEqual(400, channel.code, msg=channel.json_body) - self.assertEqual( - "Only local users can be ratelimited", channel.json_body["error"] - ) - - channel = self.make_request( - "DELETE", + method, url, access_token=self.admin_user_tok, ) self.assertEqual(400, channel.code, msg=channel.json_body) - self.assertEqual( - "Only local users can be ratelimited", channel.json_body["error"] - ) + self.assertEqual(error_msg, channel.json_body["error"]) def test_invalid_parameter(self): """ diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index 02b5e9a8d0..3c7d49f0b4 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -13,15 +13,15 @@ # limitations under the License. import itertools -import json -import urllib -from typing import Optional +import urllib.parse +from typing import Dict, List, Optional, Tuple from synapse.api.constants import EventTypes, RelationTypes from synapse.rest import admin from synapse.rest.client import login, register, relations, room from tests import unittest +from tests.server import FakeChannel class RelationsTestCase(unittest.HomeserverTestCase): @@ -34,16 +34,16 @@ class RelationsTestCase(unittest.HomeserverTestCase): ] hijack_auth = False - def make_homeserver(self, reactor, clock): + def default_config(self) -> dict: # We need to enable msc1849 support for aggregations - config = self.default_config() + config = super().default_config() config["experimental_msc1849_support_enabled"] = True # We enable frozen dicts as relations/edits change event contents, so we # want to test that we don't modify the events in the caches. config["use_frozen_dicts"] = True - return self.setup_test_homeserver(config=config) + return config def prepare(self, reactor, clock, hs): self.user_id, self.user_token = self._create_user("alice") @@ -146,8 +146,8 @@ class RelationsTestCase(unittest.HomeserverTestCase): self.assertEquals(200, channel.code, channel.json_body) expected_event_ids.append(channel.json_body["event_id"]) - prev_token = None - found_event_ids = [] + prev_token: Optional[str] = None + found_event_ids: List[str] = [] for _ in range(20): from_token = "" if prev_token: @@ -203,8 +203,8 @@ class RelationsTestCase(unittest.HomeserverTestCase): idx += 1 idx %= len(access_tokens) - prev_token = None - found_groups = {} + prev_token: Optional[str] = None + found_groups: Dict[str, int] = {} for _ in range(20): from_token = "" if prev_token: @@ -270,8 +270,8 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="a") self.assertEquals(200, channel.code, channel.json_body) - prev_token = None - found_event_ids = [] + prev_token: Optional[str] = None + found_event_ids: List[str] = [] encoded_key = urllib.parse.quote_plus("👍".encode()) for _ in range(20): from_token = "" @@ -677,24 +677,23 @@ class RelationsTestCase(unittest.HomeserverTestCase): def _send_relation( self, - relation_type, - event_type, - key=None, + relation_type: str, + event_type: str, + key: Optional[str] = None, content: Optional[dict] = None, - access_token=None, - parent_id=None, - ): + access_token: Optional[str] = None, + parent_id: Optional[str] = None, + ) -> FakeChannel: """Helper function to send a relation pointing at `self.parent_id` Args: - relation_type (str): One of `RelationTypes` - event_type (str): The type of the event to create - parent_id (str): The event_id this relation relates to. If None, then self.parent_id - key (str|None): The aggregation key used for m.annotation relation - type. - content(dict|None): The content of the created event. - access_token (str|None): The access token used to send the relation, - defaults to `self.user_token` + relation_type: One of `RelationTypes` + event_type: The type of the event to create + key: The aggregation key used for m.annotation relation type. + content: The content of the created event. + access_token: The access token used to send the relation, defaults + to `self.user_token` + parent_id: The event_id this relation relates to. If None, then self.parent_id Returns: FakeChannel @@ -712,12 +711,12 @@ class RelationsTestCase(unittest.HomeserverTestCase): "POST", "/_matrix/client/unstable/rooms/%s/send_relation/%s/%s/%s%s" % (self.room, original_id, relation_type, event_type, query), - json.dumps(content or {}).encode("utf-8"), + content or {}, access_token=access_token, ) return channel - def _create_user(self, localpart): + def _create_user(self, localpart: str) -> Tuple[str, str]: user_id = self.register_user(localpart, "abc123") access_token = self.login(localpart, "abc123") diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py index 38ac9be113..531f09c48b 100644 --- a/tests/rest/client/test_third_party_rules.py +++ b/tests/rest/client/test_third_party_rules.py @@ -12,25 +12,28 @@ # See the License for the specific language governing permissions and # limitations under the License. import threading -from typing import Dict +from typing import TYPE_CHECKING, Dict, Optional, Tuple from unittest.mock import Mock from synapse.api.constants import EventTypes +from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.events.third_party_rules import load_legacy_third_party_event_rules -from synapse.module_api import ModuleApi from synapse.rest import admin from synapse.rest.client import login, room -from synapse.types import Requester, StateMap +from synapse.types import JsonDict, Requester, StateMap from synapse.util.frozenutils import unfreeze from tests import unittest +if TYPE_CHECKING: + from synapse.module_api import ModuleApi + thread_local = threading.local() class LegacyThirdPartyRulesTestModule: - def __init__(self, config: Dict, module_api: ModuleApi): + def __init__(self, config: Dict, module_api: "ModuleApi"): # keep a record of the "current" rules module, so that the test can patch # it if desired. thread_local.rules_module = self @@ -50,7 +53,7 @@ class LegacyThirdPartyRulesTestModule: class LegacyDenyNewRooms(LegacyThirdPartyRulesTestModule): - def __init__(self, config: Dict, module_api: ModuleApi): + def __init__(self, config: Dict, module_api: "ModuleApi"): super().__init__(config, module_api) def on_create_room( @@ -60,7 +63,7 @@ class LegacyDenyNewRooms(LegacyThirdPartyRulesTestModule): class LegacyChangeEvents(LegacyThirdPartyRulesTestModule): - def __init__(self, config: Dict, module_api: ModuleApi): + def __init__(self, config: Dict, module_api: "ModuleApi"): super().__init__(config, module_api) async def check_event_allowed(self, event: EventBase, state: StateMap[EventBase]): @@ -136,6 +139,47 @@ class ThirdPartyRulesTestCase(unittest.HomeserverTestCase): ) self.assertEquals(channel.result["code"], b"403", channel.result) + def test_third_party_rules_workaround_synapse_errors_pass_through(self): + """ + Tests that the workaround introduced by https://github.com/matrix-org/synapse/pull/11042 + is functional: that SynapseErrors are passed through from check_event_allowed + and bubble up to the web resource. + + NEW MODULES SHOULD NOT MAKE USE OF THIS WORKAROUND! + This is a temporary workaround! + """ + + class NastyHackException(SynapseError): + def error_dict(self): + """ + This overrides SynapseError's `error_dict` to nastily inject + JSON into the error response. + """ + result = super().error_dict() + result["nasty"] = "very" + return result + + # add a callback that will raise our hacky exception + async def check(ev, state) -> Tuple[bool, Optional[JsonDict]]: + raise NastyHackException(429, "message") + + self.hs.get_third_party_event_rules()._check_event_allowed_callbacks = [check] + + # Make a request + channel = self.make_request( + "PUT", + "/_matrix/client/r0/rooms/%s/send/foo.bar.forbidden/2" % self.room_id, + {}, + access_token=self.tok, + ) + # Check the error code + self.assertEquals(channel.result["code"], b"429", channel.result) + # Check the JSON body has had the `nasty` key injected + self.assertEqual( + channel.json_body, + {"errcode": "M_UNKNOWN", "error": "message", "nasty": "very"}, + ) + def test_cannot_modify_event(self): """cannot accidentally modify an event before it is persisted""" diff --git a/tests/rest/media/v1/test_filepath.py b/tests/rest/media/v1/test_filepath.py new file mode 100644 index 0000000000..09504a485f --- /dev/null +++ b/tests/rest/media/v1/test_filepath.py @@ -0,0 +1,238 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.rest.media.v1.filepath import MediaFilePaths + +from tests import unittest + + +class MediaFilePathsTestCase(unittest.TestCase): + def setUp(self): + super().setUp() + + self.filepaths = MediaFilePaths("/media_store") + + def test_local_media_filepath(self): + """Test local media paths""" + self.assertEqual( + self.filepaths.local_media_filepath_rel("GerZNDnDZVjsOtardLuwfIBg"), + "local_content/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + self.assertEqual( + self.filepaths.local_media_filepath("GerZNDnDZVjsOtardLuwfIBg"), + "/media_store/local_content/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + + def test_local_media_thumbnail(self): + """Test local media thumbnail paths""" + self.assertEqual( + self.filepaths.local_media_thumbnail_rel( + "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale" + ), + "local_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale", + ) + self.assertEqual( + self.filepaths.local_media_thumbnail( + "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale" + ), + "/media_store/local_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale", + ) + + def test_local_media_thumbnail_dir(self): + """Test local media thumbnail directory paths""" + self.assertEqual( + self.filepaths.local_media_thumbnail_dir("GerZNDnDZVjsOtardLuwfIBg"), + "/media_store/local_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + + def test_remote_media_filepath(self): + """Test remote media paths""" + self.assertEqual( + self.filepaths.remote_media_filepath_rel( + "example.com", "GerZNDnDZVjsOtardLuwfIBg" + ), + "remote_content/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + self.assertEqual( + self.filepaths.remote_media_filepath( + "example.com", "GerZNDnDZVjsOtardLuwfIBg" + ), + "/media_store/remote_content/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + + def test_remote_media_thumbnail(self): + """Test remote media thumbnail paths""" + self.assertEqual( + self.filepaths.remote_media_thumbnail_rel( + "example.com", + "GerZNDnDZVjsOtardLuwfIBg", + 800, + 600, + "image/jpeg", + "scale", + ), + "remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale", + ) + self.assertEqual( + self.filepaths.remote_media_thumbnail( + "example.com", + "GerZNDnDZVjsOtardLuwfIBg", + 800, + 600, + "image/jpeg", + "scale", + ), + "/media_store/remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale", + ) + + def test_remote_media_thumbnail_legacy(self): + """Test old-style remote media thumbnail paths""" + self.assertEqual( + self.filepaths.remote_media_thumbnail_rel_legacy( + "example.com", "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg" + ), + "remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg", + ) + + def test_remote_media_thumbnail_dir(self): + """Test remote media thumbnail directory paths""" + self.assertEqual( + self.filepaths.remote_media_thumbnail_dir( + "example.com", "GerZNDnDZVjsOtardLuwfIBg" + ), + "/media_store/remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + + def test_url_cache_filepath(self): + """Test URL cache paths""" + self.assertEqual( + self.filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar"), + "url_cache/2020-01-02/GerZNDnDZVjsOtar", + ) + self.assertEqual( + self.filepaths.url_cache_filepath("2020-01-02_GerZNDnDZVjsOtar"), + "/media_store/url_cache/2020-01-02/GerZNDnDZVjsOtar", + ) + + def test_url_cache_filepath_legacy(self): + """Test old-style URL cache paths""" + self.assertEqual( + self.filepaths.url_cache_filepath_rel("GerZNDnDZVjsOtardLuwfIBg"), + "url_cache/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + self.assertEqual( + self.filepaths.url_cache_filepath("GerZNDnDZVjsOtardLuwfIBg"), + "/media_store/url_cache/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + + def test_url_cache_filepath_dirs_to_delete(self): + """Test URL cache cleanup paths""" + self.assertEqual( + self.filepaths.url_cache_filepath_dirs_to_delete( + "2020-01-02_GerZNDnDZVjsOtar" + ), + ["/media_store/url_cache/2020-01-02"], + ) + + def test_url_cache_filepath_dirs_to_delete_legacy(self): + """Test old-style URL cache cleanup paths""" + self.assertEqual( + self.filepaths.url_cache_filepath_dirs_to_delete( + "GerZNDnDZVjsOtardLuwfIBg" + ), + [ + "/media_store/url_cache/Ge/rZ", + "/media_store/url_cache/Ge", + ], + ) + + def test_url_cache_thumbnail(self): + """Test URL cache thumbnail paths""" + self.assertEqual( + self.filepaths.url_cache_thumbnail_rel( + "2020-01-02_GerZNDnDZVjsOtar", 800, 600, "image/jpeg", "scale" + ), + "url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar/800-600-image-jpeg-scale", + ) + self.assertEqual( + self.filepaths.url_cache_thumbnail( + "2020-01-02_GerZNDnDZVjsOtar", 800, 600, "image/jpeg", "scale" + ), + "/media_store/url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar/800-600-image-jpeg-scale", + ) + + def test_url_cache_thumbnail_legacy(self): + """Test old-style URL cache thumbnail paths""" + self.assertEqual( + self.filepaths.url_cache_thumbnail_rel( + "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale" + ), + "url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale", + ) + self.assertEqual( + self.filepaths.url_cache_thumbnail( + "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale" + ), + "/media_store/url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale", + ) + + def test_url_cache_thumbnail_directory(self): + """Test URL cache thumbnail directory paths""" + self.assertEqual( + self.filepaths.url_cache_thumbnail_directory_rel( + "2020-01-02_GerZNDnDZVjsOtar" + ), + "url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar", + ) + self.assertEqual( + self.filepaths.url_cache_thumbnail_directory("2020-01-02_GerZNDnDZVjsOtar"), + "/media_store/url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar", + ) + + def test_url_cache_thumbnail_directory_legacy(self): + """Test old-style URL cache thumbnail directory paths""" + self.assertEqual( + self.filepaths.url_cache_thumbnail_directory_rel( + "GerZNDnDZVjsOtardLuwfIBg" + ), + "url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + self.assertEqual( + self.filepaths.url_cache_thumbnail_directory("GerZNDnDZVjsOtardLuwfIBg"), + "/media_store/url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg", + ) + + def test_url_cache_thumbnail_dirs_to_delete(self): + """Test URL cache thumbnail cleanup paths""" + self.assertEqual( + self.filepaths.url_cache_thumbnail_dirs_to_delete( + "2020-01-02_GerZNDnDZVjsOtar" + ), + [ + "/media_store/url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar", + "/media_store/url_cache_thumbnails/2020-01-02", + ], + ) + + def test_url_cache_thumbnail_dirs_to_delete_legacy(self): + """Test old-style URL cache thumbnail cleanup paths""" + self.assertEqual( + self.filepaths.url_cache_thumbnail_dirs_to_delete( + "GerZNDnDZVjsOtardLuwfIBg" + ), + [ + "/media_store/url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg", + "/media_store/url_cache_thumbnails/Ge/rZ", + "/media_store/url_cache_thumbnails/Ge", + ], + ) diff --git a/tests/rest/media/v1/test_oembed.py b/tests/rest/media/v1/test_oembed.py new file mode 100644 index 0000000000..048d0ca44a --- /dev/null +++ b/tests/rest/media/v1/test_oembed.py @@ -0,0 +1,51 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from twisted.test.proto_helpers import MemoryReactor + +from synapse.rest.media.v1.oembed import OEmbedProvider +from synapse.server import HomeServer +from synapse.types import JsonDict +from synapse.util import Clock + +from tests.unittest import HomeserverTestCase + + +class OEmbedTests(HomeserverTestCase): + def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer): + self.oembed = OEmbedProvider(homeserver) + + def parse_response(self, response: JsonDict): + return self.oembed.parse_oembed_response( + "https://test", json.dumps(response).encode("utf-8") + ) + + def test_version(self): + """Accept versions that are similar to 1.0 as a string or int (or missing).""" + for version in ("1.0", 1.0, 1): + result = self.parse_response({"version": version, "type": "link"}) + # An empty Open Graph response is an error, ensure the URL is included. + self.assertIn("og:url", result.open_graph_result) + + # A missing version should be treated as 1.0. + result = self.parse_response({"type": "link"}) + self.assertIn("og:url", result.open_graph_result) + + # Invalid versions should be rejected. + for version in ("2.0", "1", 1.1, 0, None, {}, []): + result = self.parse_response({"version": version, "type": "link"}) + # An empty Open Graph response is an error, ensure the URL is included. + self.assertEqual({}, result.open_graph_result) diff --git a/tests/server.py b/tests/server.py index 64645651ce..103351b487 100644 --- a/tests/server.py +++ b/tests/server.py @@ -1,3 +1,17 @@ +# Copyright 2018-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import logging from collections import deque @@ -27,9 +41,10 @@ from twisted.python.failure import Failure from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock from twisted.web.http_headers import Headers from twisted.web.resource import IResource -from twisted.web.server import Site +from twisted.web.server import Request, Site from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from synapse.util import Clock from tests.utils import setup_test_homeserver as _sth @@ -198,14 +213,14 @@ class FakeSite: def make_request( reactor, site: Union[Site, FakeSite], - method, - path, - content=b"", - access_token=None, - request=SynapseRequest, - shorthand=True, - federation_auth_origin=None, - content_is_form=False, + method: Union[bytes, str], + path: Union[bytes, str], + content: Union[bytes, str, JsonDict] = b"", + access_token: Optional[str] = None, + request: Request = SynapseRequest, + shorthand: bool = True, + federation_auth_origin: Optional[bytes] = None, + content_is_form: bool = False, await_result: bool = True, custom_headers: Optional[ Iterable[Tuple[Union[bytes, str], Union[bytes, str]]] @@ -218,26 +233,23 @@ def make_request( Returns the fake Channel object which records the response to the request. Args: + reactor: site: The twisted Site to use to render the request - - method (bytes/unicode): The HTTP request method ("verb"). - path (bytes/unicode): The HTTP path, suitably URL encoded (e.g. - escaped UTF-8 & spaces and such). - content (bytes or dict): The body of the request. JSON-encoded, if - a dict. + method: The HTTP request method ("verb"). + path: The HTTP path, suitably URL encoded (e.g. escaped UTF-8 & spaces and such). + content: The body of the request. JSON-encoded, if a str of bytes. + access_token: The access token to add as authorization for the request. + request: The request class to create. shorthand: Whether to try and be helpful and prefix the given URL - with the usual REST API path, if it doesn't contain it. - federation_auth_origin (bytes|None): if set to not-None, we will add a fake + with the usual REST API path, if it doesn't contain it. + federation_auth_origin: if set to not-None, we will add a fake Authorization header pretenting to be the given server name. content_is_form: Whether the content is URL encoded form data. Adds the 'Content-Type': 'application/x-www-form-urlencoded' header. - - custom_headers: (name, value) pairs to add as request headers - await_result: whether to wait for the request to complete rendering. If true, will pump the reactor until the the renderer tells the channel the request is finished. - + custom_headers: (name, value) pairs to add as request headers client_ip: The IP to use as the requesting IP. Useful for testing ratelimiting. diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index be3ed64f5e..37cf7bb232 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List, Set, Tuple +from typing import Any, Dict, Set, Tuple from unittest import mock from unittest.mock import Mock, patch @@ -42,18 +42,7 @@ class GetUserDirectoryTables: def __init__(self, store: DataStore): self.store = store - def _compress_shared( - self, shared: List[Dict[str, str]] - ) -> Set[Tuple[str, str, str]]: - """ - Compress a list of users who share rooms dicts to a list of tuples. - """ - r = set() - for i in shared: - r.add((i["user_id"], i["other_user_id"], i["room_id"])) - return r - - async def get_users_in_public_rooms(self) -> List[Tuple[str, str]]: + async def get_users_in_public_rooms(self) -> Set[Tuple[str, str]]: """Fetch the entire `users_in_public_rooms` table. Returns a list of tuples (user_id, room_id) where room_id is public and @@ -63,24 +52,27 @@ class GetUserDirectoryTables: "users_in_public_rooms", None, ("user_id", "room_id") ) - retval = [] + retval = set() for i in r: - retval.append((i["user_id"], i["room_id"])) + retval.add((i["user_id"], i["room_id"])) return retval - async def get_users_who_share_private_rooms(self) -> List[Dict[str, str]]: + async def get_users_who_share_private_rooms(self) -> Set[Tuple[str, str, str]]: """Fetch the entire `users_who_share_private_rooms` table. - Returns a dict containing "user_id", "other_user_id" and "room_id" keys. - The dicts can be flattened to Tuples with the `_compress_shared` method. - (This seems a little awkward---maybe we could clean this up.) + Returns a set of tuples (user_id, other_user_id, room_id) corresponding + to the rows of `users_who_share_private_rooms`. """ - return await self.store.db_pool.simple_select_list( + rows = await self.store.db_pool.simple_select_list( "users_who_share_private_rooms", None, ["user_id", "other_user_id", "room_id"], ) + rv = set() + for row in rows: + rv.add((row["user_id"], row["other_user_id"], row["room_id"])) + return rv async def get_users_in_user_directory(self) -> Set[str]: """Fetch the set of users in the `user_directory` table. @@ -113,6 +105,16 @@ class GetUserDirectoryTables: for row in rows } + async def get_tables( + self, + ) -> Tuple[Set[str], Set[Tuple[str, str]], Set[Tuple[str, str, str]]]: + """Multiple tests want to inspect these tables, so expose them together.""" + return ( + await self.get_users_in_user_directory(), + await self.get_users_in_public_rooms(), + await self.get_users_who_share_private_rooms(), + ) + class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): """Ensure that rebuilding the directory writes the correct data to the DB. @@ -166,8 +168,8 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): ) # Nothing updated yet - self.assertEqual(shares_private, []) - self.assertEqual(public_users, []) + self.assertEqual(shares_private, set()) + self.assertEqual(public_users, set()) # Ugh, have to reset this flag self.store.db_pool.updates._all_done = False @@ -236,24 +238,15 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): # Do the initial population of the user directory via the background update self._purge_and_rebuild_user_dir() - shares_private = self.get_success( - self.user_dir_helper.get_users_who_share_private_rooms() - ) - public_users = self.get_success( - self.user_dir_helper.get_users_in_public_rooms() + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() ) # User 1 and User 2 are in the same public room - self.assertEqual(set(public_users), {(u1, room), (u2, room)}) - + self.assertEqual(in_public, {(u1, room), (u2, room)}) # User 1 and User 3 share private rooms - self.assertEqual( - self.user_dir_helper._compress_shared(shares_private), - {(u1, u3, private_room), (u3, u1, private_room)}, - ) - + self.assertEqual(in_private, {(u1, u3, private_room), (u3, u1, private_room)}) # All three should have entries in the directory - users = self.get_success(self.user_dir_helper.get_users_in_user_directory()) self.assertEqual(users, {u1, u2, u3}) # The next four tests (test_population_excludes_*) all set up @@ -289,16 +282,12 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): self, normal_user: str, public_room: str, private_room: str ) -> None: # After rebuilding the directory, we should only see the normal user. - users = self.get_success(self.user_dir_helper.get_users_in_user_directory()) - self.assertEqual(users, {normal_user}) - in_public_rooms = self.get_success( - self.user_dir_helper.get_users_in_public_rooms() + users, in_public, in_private = self.get_success( + self.user_dir_helper.get_tables() ) - self.assertEqual(set(in_public_rooms), {(normal_user, public_room)}) - in_private_rooms = self.get_success( - self.user_dir_helper.get_users_who_share_private_rooms() - ) - self.assertEqual(in_private_rooms, []) + self.assertEqual(users, {normal_user}) + self.assertEqual(in_public, {(normal_user, public_room)}) + self.assertEqual(in_private, set()) def test_population_excludes_support_user(self) -> None: # Create a normal and support user. diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py index cf407c51cf..e2c506e5a4 100644 --- a/tests/test_event_auth.py +++ b/tests/test_event_auth.py @@ -24,6 +24,47 @@ from synapse.types import JsonDict, get_domain_from_id class EventAuthTestCase(unittest.TestCase): + def test_rejected_auth_events(self): + """ + Events that refer to rejected events in their auth events are rejected + """ + creator = "@creator:example.com" + auth_events = [ + _create_event(creator), + _join_event(creator), + ] + + # creator should be able to send state + event_auth.check_auth_rules_for_event( + RoomVersions.V9, + _random_state_event(creator), + auth_events, + ) + + # ... but a rejected join_rules event should cause it to be rejected + rejected_join_rules = _join_rules_event(creator, "public") + rejected_join_rules.rejected_reason = "stinky" + auth_events.append(rejected_join_rules) + + self.assertRaises( + AuthError, + event_auth.check_auth_rules_for_event, + RoomVersions.V9, + _random_state_event(creator), + auth_events, + ) + + # ... even if there is *also* a good join rules + auth_events.append(_join_rules_event(creator, "public")) + + self.assertRaises( + AuthError, + event_auth.check_auth_rules_for_event, + RoomVersions.V9, + _random_state_event(creator), + auth_events, + ) + def test_random_users_cannot_send_state_before_first_pl(self): """ Check that, before the first PL lands, the creator is the only user @@ -31,11 +72,11 @@ class EventAuthTestCase(unittest.TestCase): """ creator = "@creator:example.com" joiner = "@joiner:example.com" - auth_events = { - ("m.room.create", ""): _create_event(creator), - ("m.room.member", creator): _join_event(creator), - ("m.room.member", joiner): _join_event(joiner), - } + auth_events = [ + _create_event(creator), + _join_event(creator), + _join_event(joiner), + ] # creator should be able to send state event_auth.check_auth_rules_for_event( @@ -62,15 +103,15 @@ class EventAuthTestCase(unittest.TestCase): pleb = "@joiner:example.com" king = "@joiner2:example.com" - auth_events = { - ("m.room.create", ""): _create_event(creator), - ("m.room.member", creator): _join_event(creator), - ("m.room.power_levels", ""): _power_levels_event( + auth_events = [ + _create_event(creator), + _join_event(creator), + _power_levels_event( creator, {"state_default": "30", "users": {pleb: "29", king: "30"}} ), - ("m.room.member", pleb): _join_event(pleb), - ("m.room.member", king): _join_event(king), - } + _join_event(pleb), + _join_event(king), + ] # pleb should not be able to send state self.assertRaises( @@ -92,10 +133,10 @@ class EventAuthTestCase(unittest.TestCase): """Alias events have special behavior up through room version 6.""" creator = "@creator:example.com" other = "@other:example.com" - auth_events = { - ("m.room.create", ""): _create_event(creator), - ("m.room.member", creator): _join_event(creator), - } + auth_events = [ + _create_event(creator), + _join_event(creator), + ] # creator should be able to send aliases event_auth.check_auth_rules_for_event( @@ -131,10 +172,10 @@ class EventAuthTestCase(unittest.TestCase): """After MSC2432, alias events have no special behavior.""" creator = "@creator:example.com" other = "@other:example.com" - auth_events = { - ("m.room.create", ""): _create_event(creator), - ("m.room.member", creator): _join_event(creator), - } + auth_events = [ + _create_event(creator), + _join_event(creator), + ] # creator should be able to send aliases event_auth.check_auth_rules_for_event( @@ -170,14 +211,14 @@ class EventAuthTestCase(unittest.TestCase): creator = "@creator:example.com" pleb = "@joiner:example.com" - auth_events = { - ("m.room.create", ""): _create_event(creator), - ("m.room.member", creator): _join_event(creator), - ("m.room.power_levels", ""): _power_levels_event( + auth_events = [ + _create_event(creator), + _join_event(creator), + _power_levels_event( creator, {"state_default": "30", "users": {pleb: "30"}} ), - ("m.room.member", pleb): _join_event(pleb), - } + _join_event(pleb), + ] # pleb should be able to modify the notifications power level. event_auth.check_auth_rules_for_event( @@ -211,7 +252,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) # A user cannot be force-joined to a room. @@ -219,7 +260,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _member_event(pleb, "join", sender=creator), - auth_events, + auth_events.values(), ) # Banned should be rejected. @@ -228,7 +269,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) # A user who left can re-join. @@ -236,7 +277,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) # A user can send a join if they're in the room. @@ -244,7 +285,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) # A user can accept an invite. @@ -254,7 +295,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) def test_join_rules_invite(self): @@ -275,7 +316,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) # A user cannot be force-joined to a room. @@ -283,7 +324,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _member_event(pleb, "join", sender=creator), - auth_events, + auth_events.values(), ) # Banned should be rejected. @@ -292,7 +333,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) # A user who left cannot re-join. @@ -301,7 +342,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) # A user can send a join if they're in the room. @@ -309,7 +350,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) # A user can accept an invite. @@ -319,7 +360,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) def test_join_rules_msc3083_restricted(self): @@ -347,7 +388,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V6, _join_event(pleb), - auth_events, + auth_events.values(), ) # A properly formatted join event should work. @@ -360,7 +401,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V8, authorised_join_event, - auth_events, + auth_events.values(), ) # A join issued by a specific user works (i.e. the power level checks @@ -380,7 +421,7 @@ class EventAuthTestCase(unittest.TestCase): EventContentFields.AUTHORISING_USER: "@inviter:foo.test" }, ), - pl_auth_events, + pl_auth_events.values(), ) # A join which is missing an authorised server is rejected. @@ -388,7 +429,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V8, _join_event(pleb), - auth_events, + auth_events.values(), ) # An join authorised by a user who is not in the room is rejected. @@ -405,7 +446,7 @@ class EventAuthTestCase(unittest.TestCase): EventContentFields.AUTHORISING_USER: "@other:example.com" }, ), - auth_events, + auth_events.values(), ) # A user cannot be force-joined to a room. (This uses an event which @@ -421,7 +462,7 @@ class EventAuthTestCase(unittest.TestCase): EventContentFields.AUTHORISING_USER: "@inviter:foo.test" }, ), - auth_events, + auth_events.values(), ) # Banned should be rejected. @@ -430,7 +471,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V8, authorised_join_event, - auth_events, + auth_events.values(), ) # A user who left can re-join. @@ -438,7 +479,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V8, authorised_join_event, - auth_events, + auth_events.values(), ) # A user can send a join if they're in the room. (This doesn't need to @@ -447,7 +488,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V8, _join_event(pleb), - auth_events, + auth_events.values(), ) # A user can accept an invite. (This doesn't need to be authorised since @@ -458,7 +499,7 @@ class EventAuthTestCase(unittest.TestCase): event_auth.check_auth_rules_for_event( RoomVersions.V8, _join_event(pleb), - auth_events, + auth_events.values(), ) @@ -473,6 +514,7 @@ def _create_event(user_id: str) -> EventBase: "room_id": TEST_ROOM_ID, "event_id": _get_event_id(), "type": "m.room.create", + "state_key": "", "sender": user_id, "content": {"creator": user_id}, } diff --git a/tests/test_preview.py b/tests/test_preview.py index 09e017b4d9..9a576f9a4e 100644 --- a/tests/test_preview.py +++ b/tests/test_preview.py @@ -15,7 +15,7 @@ from synapse.rest.media.v1.preview_url_resource import ( _calc_og, decode_body, - get_html_media_encoding, + get_html_media_encodings, summarize_paragraphs, ) @@ -159,7 +159,7 @@ class CalcOgTestCase(unittest.TestCase): </html> """ - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") og = _calc_og(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -175,7 +175,7 @@ class CalcOgTestCase(unittest.TestCase): </html> """ - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") og = _calc_og(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -194,7 +194,7 @@ class CalcOgTestCase(unittest.TestCase): </html> """ - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") og = _calc_og(tree, "http://example.com/test.html") self.assertEqual( @@ -216,7 +216,7 @@ class CalcOgTestCase(unittest.TestCase): </html> """ - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") og = _calc_og(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -230,7 +230,7 @@ class CalcOgTestCase(unittest.TestCase): </html> """ - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") og = _calc_og(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": None, "og:description": "Some text."}) @@ -245,7 +245,7 @@ class CalcOgTestCase(unittest.TestCase): </html> """ - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") og = _calc_og(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."}) @@ -260,7 +260,7 @@ class CalcOgTestCase(unittest.TestCase): </html> """ - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") og = _calc_og(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": None, "og:description": "Some text."}) @@ -268,13 +268,13 @@ class CalcOgTestCase(unittest.TestCase): def test_empty(self): """Test a body with no data in it.""" html = b"" - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") self.assertIsNone(tree) def test_no_tree(self): """A valid body with no tree in it.""" html = b"\x00" - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") self.assertIsNone(tree) def test_invalid_encoding(self): @@ -287,7 +287,7 @@ class CalcOgTestCase(unittest.TestCase): </body> </html> """ - tree = decode_body(html, "invalid-encoding") + tree = decode_body(html, "http://example.com/test.html", "invalid-encoding") og = _calc_og(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -302,15 +302,29 @@ class CalcOgTestCase(unittest.TestCase): </body> </html> """ - tree = decode_body(html) + tree = decode_body(html, "http://example.com/test.html") og = _calc_og(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."}) + def test_windows_1252(self): + """A body which uses cp1252, but doesn't declare that.""" + html = b""" + <html> + <head><title>\xf3</title></head> + <body> + Some text. + </body> + </html> + """ + tree = decode_body(html, "http://example.com/test.html") + og = _calc_og(tree, "http://example.com/test.html") + self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."}) + class MediaEncodingTestCase(unittest.TestCase): def test_meta_charset(self): """A character encoding is found via the meta tag.""" - encoding = get_html_media_encoding( + encodings = get_html_media_encodings( b""" <html> <head><meta charset="ascii"> @@ -319,10 +333,10 @@ class MediaEncodingTestCase(unittest.TestCase): """, "text/html", ) - self.assertEqual(encoding, "ascii") + self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) # A less well-formed version. - encoding = get_html_media_encoding( + encodings = get_html_media_encodings( b""" <html> <head>< meta charset = ascii> @@ -331,11 +345,11 @@ class MediaEncodingTestCase(unittest.TestCase): """, "text/html", ) - self.assertEqual(encoding, "ascii") + self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) def test_meta_charset_underscores(self): """A character encoding contains underscore.""" - encoding = get_html_media_encoding( + encodings = get_html_media_encodings( b""" <html> <head><meta charset="Shift_JIS"> @@ -344,11 +358,11 @@ class MediaEncodingTestCase(unittest.TestCase): """, "text/html", ) - self.assertEqual(encoding, "Shift_JIS") + self.assertEqual(list(encodings), ["shift_jis", "utf-8", "cp1252"]) def test_xml_encoding(self): """A character encoding is found via the meta tag.""" - encoding = get_html_media_encoding( + encodings = get_html_media_encodings( b""" <?xml version="1.0" encoding="ascii"?> <html> @@ -356,11 +370,11 @@ class MediaEncodingTestCase(unittest.TestCase): """, "text/html", ) - self.assertEqual(encoding, "ascii") + self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) def test_meta_xml_encoding(self): """Meta tags take precedence over XML encoding.""" - encoding = get_html_media_encoding( + encodings = get_html_media_encodings( b""" <?xml version="1.0" encoding="ascii"?> <html> @@ -370,7 +384,7 @@ class MediaEncodingTestCase(unittest.TestCase): """, "text/html", ) - self.assertEqual(encoding, "UTF-16") + self.assertEqual(list(encodings), ["utf-16", "ascii", "utf-8", "cp1252"]) def test_content_type(self): """A character encoding is found via the Content-Type header.""" @@ -384,10 +398,37 @@ class MediaEncodingTestCase(unittest.TestCase): 'text/html; charset=ascii";', ) for header in headers: - encoding = get_html_media_encoding(b"", header) - self.assertEqual(encoding, "ascii") + encodings = get_html_media_encodings(b"", header) + self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) def test_fallback(self): """A character encoding cannot be found in the body or header.""" - encoding = get_html_media_encoding(b"", "text/html") - self.assertEqual(encoding, "utf-8") + encodings = get_html_media_encodings(b"", "text/html") + self.assertEqual(list(encodings), ["utf-8", "cp1252"]) + + def test_duplicates(self): + """Ensure each encoding is only attempted once.""" + encodings = get_html_media_encodings( + b""" + <?xml version="1.0" encoding="utf8"?> + <html> + <head><meta charset="UTF-8"> + </head> + </html> + """, + 'text/html; charset="UTF_8"', + ) + self.assertEqual(list(encodings), ["utf-8", "cp1252"]) + + def test_unknown_invalid(self): + """A character encoding should be ignored if it is unknown or invalid.""" + encodings = get_html_media_encodings( + b""" + <html> + <head><meta charset="invalid"> + </head> + </html> + """, + 'text/html; charset="invalid"', + ) + self.assertEqual(list(encodings), ["utf-8", "cp1252"]) diff --git a/tests/unittest.py b/tests/unittest.py index 81c1a9e9d2..a9b60b7eeb 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -46,7 +46,7 @@ from synapse.logging.context import ( set_current_context, ) from synapse.server import HomeServer -from synapse.types import UserID, create_requester +from synapse.types import JsonDict, UserID, create_requester from synapse.util import Clock from synapse.util.httpresourcetree import create_resource_tree from synapse.util.ratelimitutils import FederationRateLimiter @@ -401,7 +401,7 @@ class HomeserverTestCase(TestCase): self, method: Union[bytes, str], path: Union[bytes, str], - content: Union[bytes, dict] = b"", + content: Union[bytes, str, JsonDict] = b"", access_token: Optional[str] = None, request: Type[T] = SynapseRequest, shorthand: bool = True, |