diff options
183 files changed, 4875 insertions, 1599 deletions
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index af7ed21fce..3276d1e122 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -5,7 +5,7 @@ name: Build docker images on: push: tags: ["v*"] - branches: [ master, main ] + branches: [ master, main, develop ] workflow_dispatch: permissions: @@ -38,6 +38,9 @@ jobs: id: set-tag run: | case "${GITHUB_REF}" in + refs/heads/develop) + tag=develop + ;; refs/heads/master|refs/heads/main) tag=latest ;; diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8d7e8cafd9..21c9ee7823 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -374,7 +374,7 @@ jobs: working-directory: complement/dockerfiles # Run Complement - - run: go test -v -tags synapse_blacklist,msc2403,msc2946,msc3083 ./tests/... + - run: go test -v -tags synapse_blacklist,msc2403 ./tests/... env: COMPLEMENT_BASE_IMAGE: complement-synapse:latest working-directory: complement diff --git a/CHANGES.md b/CHANGES.md index b75d938e0e..5f625dad1d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,42 @@ +Synapse 1.47.1 (2021-11-23) +=========================== + +This release fixes a security issue in the media store, affecting all prior releases of Synapse. Server administrators are encouraged to update Synapse as soon as possible. We are not aware of these vulnerabilities being exploited in the wild. + +Server administrators who are unable to update Synapse may use the workarounds described in the linked GitHub Security Advisory below. + +Security advisory +----------------- + +The following issue is fixed in 1.47.1. + +- **[GHSA-3hfw-x7gx-437c](https://github.com/matrix-org/synapse/security/advisories/GHSA-3hfw-x7gx-437c) / [CVE-2021-41281](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41281): Path traversal when downloading remote media.** + + Synapse instances with the media repository enabled can be tricked into downloading a file from a remote server into an arbitrary directory, potentially outside the media store directory. + + The last two directories and file name of the path are chosen randomly by Synapse and cannot be controlled by an attacker, which limits the impact. + + Homeservers with the media repository disabled are unaffected. Homeservers configured with a federation whitelist are also unaffected. + + Fixed by [91f2bd090](https://github.com/matrix-org/synapse/commit/91f2bd090). + + +Synapse 1.47.0 (2021-11-17) +=========================== + +No significant changes since 1.47.0rc3. + + +Synapse 1.47.0rc3 (2021-11-16) +============================== + +Bugfixes +-------- + +- Fix a bug introduced in 1.47.0rc1 which caused worker processes to not halt startup in the presence of outstanding database migrations. ([\#11346](https://github.com/matrix-org/synapse/issues/11346)) +- Fix a bug introduced in 1.47.0rc1 which prevented the 'remove deleted devices from `device_inbox` column' background process from running when updating from a recent Synapse version. ([\#11303](https://github.com/matrix-org/synapse/issues/11303), [\#11353](https://github.com/matrix-org/synapse/issues/11353)) + + Synapse 1.47.0rc2 (2021-11-10) ============================== diff --git a/changelog.d/10847.misc b/changelog.d/10847.misc new file mode 100644 index 0000000000..7933a38dca --- /dev/null +++ b/changelog.d/10847.misc @@ -0,0 +1 @@ +Add type annotations to `synapse.metrics`. diff --git a/changelog.d/11029.misc b/changelog.d/11029.misc new file mode 100644 index 0000000000..111de5fc7a --- /dev/null +++ b/changelog.d/11029.misc @@ -0,0 +1 @@ +Improve type annotations in `synapse.module_api`. \ No newline at end of file diff --git a/changelog.d/11161.feature b/changelog.d/11161.feature new file mode 100644 index 0000000000..76b0d28084 --- /dev/null +++ b/changelog.d/11161.feature @@ -0,0 +1 @@ +Experimental support for the thread relation defined in [MSC3440](https://github.com/matrix-org/matrix-doc/pull/3440). diff --git a/changelog.d/11220.bugfix b/changelog.d/11220.bugfix new file mode 100644 index 0000000000..8baae28d5b --- /dev/null +++ b/changelog.d/11220.bugfix @@ -0,0 +1 @@ +Fix using MSC2716 batch sending in combination with event persistence workers. Contributed by @tulir at Beeper. diff --git a/changelog.d/11265.bugfix b/changelog.d/11265.bugfix new file mode 100644 index 0000000000..b0e9dfac53 --- /dev/null +++ b/changelog.d/11265.bugfix @@ -0,0 +1 @@ +Prevent [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) historical state events from being pushed to an application service via `/transactions`. diff --git a/changelog.d/11306.feature b/changelog.d/11306.feature new file mode 100644 index 0000000000..aba3292015 --- /dev/null +++ b/changelog.d/11306.feature @@ -0,0 +1 @@ +Add plugin support for controlling database background updates. diff --git a/changelog.d/11318.feature b/changelog.d/11318.feature new file mode 100644 index 0000000000..ce28fc1eef --- /dev/null +++ b/changelog.d/11318.feature @@ -0,0 +1 @@ +Add support for the `/_matrix/client/v3` APIs from Matrix v1.1. \ No newline at end of file diff --git a/changelog.d/11324.feature b/changelog.d/11324.feature new file mode 100644 index 0000000000..55494358bb --- /dev/null +++ b/changelog.d/11324.feature @@ -0,0 +1 @@ +Add dedicated admin API for blocking a room. \ No newline at end of file diff --git a/changelog.d/11328.misc b/changelog.d/11328.misc new file mode 100644 index 0000000000..7c377813e8 --- /dev/null +++ b/changelog.d/11328.misc @@ -0,0 +1 @@ +Add type hints to `synapse.util`. diff --git a/changelog.d/11329.feature b/changelog.d/11329.feature new file mode 100644 index 0000000000..7e0efb3b00 --- /dev/null +++ b/changelog.d/11329.feature @@ -0,0 +1 @@ +Support the stable API endpoints for [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946): the room `/hierarchy` endpoint. diff --git a/changelog.d/11333.misc b/changelog.d/11333.misc new file mode 100644 index 0000000000..6c1fd560ad --- /dev/null +++ b/changelog.d/11333.misc @@ -0,0 +1 @@ +Remove deprecated `trust_identity_server_for_password_resets` configuration flag. \ No newline at end of file diff --git a/changelog.d/11341.misc b/changelog.d/11341.misc new file mode 100644 index 0000000000..3d8da755ee --- /dev/null +++ b/changelog.d/11341.misc @@ -0,0 +1 @@ +Add type annotations for some methods and properties in the module API. diff --git a/changelog.d/11352.feature b/changelog.d/11352.feature new file mode 100644 index 0000000000..a4d01b3549 --- /dev/null +++ b/changelog.d/11352.feature @@ -0,0 +1 @@ +Add admin API to run background jobs. \ No newline at end of file diff --git a/changelog.d/11355.bugfix b/changelog.d/11355.bugfix new file mode 100644 index 0000000000..91639f14b2 --- /dev/null +++ b/changelog.d/11355.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in 1.41.0 where space hierarchy responses would be incorrectly reused if multiple users were to make the same request at the same time. diff --git a/changelog.d/11359.misc b/changelog.d/11359.misc new file mode 100644 index 0000000000..4720519cbc --- /dev/null +++ b/changelog.d/11359.misc @@ -0,0 +1 @@ +Require all files in synapse/ and tests/ to pass mypy unless specifically excluded. diff --git a/changelog.d/11361.feature b/changelog.d/11361.feature new file mode 100644 index 0000000000..24c9244887 --- /dev/null +++ b/changelog.d/11361.feature @@ -0,0 +1 @@ +Update the JWT login type to support custom a `sub` claim. diff --git a/changelog.d/11368.misc b/changelog.d/11368.misc new file mode 100644 index 0000000000..d623935865 --- /dev/null +++ b/changelog.d/11368.misc @@ -0,0 +1 @@ +Fix running `scripts-dev/complement.sh`, which was broken in v1.47.0rc1. diff --git a/changelog.d/11369.misc b/changelog.d/11369.misc new file mode 100644 index 0000000000..3c1dad544b --- /dev/null +++ b/changelog.d/11369.misc @@ -0,0 +1 @@ +Rename `get_access_token_for_user_id` to `create_access_token_for_user_id` to better reflect what it does. diff --git a/changelog.d/11370.misc b/changelog.d/11370.misc new file mode 100644 index 0000000000..13d9f36bf3 --- /dev/null +++ b/changelog.d/11370.misc @@ -0,0 +1 @@ +Rename `get_refresh_token_for_user_id` to `create_refresh_token_for_user_id` to better describe what it does. diff --git a/changelog.d/11371.feature b/changelog.d/11371.feature new file mode 100644 index 0000000000..8e9ca2d633 --- /dev/null +++ b/changelog.d/11371.feature @@ -0,0 +1 @@ +Add support for the `/_matrix/media/v3` APIs from Matrix v1.1. \ No newline at end of file diff --git a/changelog.d/11376.bugfix b/changelog.d/11376.bugfix new file mode 100644 index 0000000000..639e48b59b --- /dev/null +++ b/changelog.d/11376.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where all requests that read events from the database could get stuck as a result of losing the database connection, for real this time. Also fix a race condition introduced in the previous insufficient fix in 1.47.0. diff --git a/changelog.d/11377.bugfix b/changelog.d/11377.bugfix new file mode 100644 index 0000000000..9831fb7bbe --- /dev/null +++ b/changelog.d/11377.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.45.0 where the `read_templates` method of the module API would error. diff --git a/changelog.d/11377.misc b/changelog.d/11377.misc new file mode 100644 index 0000000000..3dac625576 --- /dev/null +++ b/changelog.d/11377.misc @@ -0,0 +1 @@ +Add type hints to configuration classes. diff --git a/changelog.d/11379.bugfix b/changelog.d/11379.bugfix new file mode 100644 index 0000000000..a49d4eb776 --- /dev/null +++ b/changelog.d/11379.bugfix @@ -0,0 +1 @@ +Fix an issue introduced in v1.47.0 which prevented servers re-joining rooms they had previously left, if their signing keys were replaced. diff --git a/changelog.d/11380.misc b/changelog.d/11380.misc new file mode 100644 index 0000000000..703e80d14f --- /dev/null +++ b/changelog.d/11380.misc @@ -0,0 +1 @@ +Publish a `develop` image to dockerhub. diff --git a/changelog.d/11381.doc b/changelog.d/11381.doc new file mode 100644 index 0000000000..f99a432418 --- /dev/null +++ b/changelog.d/11381.doc @@ -0,0 +1 @@ +Fix missing quotes for wildcard domains in `federation_certificate_verification_whitelist`. diff --git a/changelog.d/11382.misc b/changelog.d/11382.misc new file mode 100644 index 0000000000..d812ef309e --- /dev/null +++ b/changelog.d/11382.misc @@ -0,0 +1 @@ +Keep fallback key marked as used if it's re-uploaded. diff --git a/changelog.d/11386.misc b/changelog.d/11386.misc new file mode 100644 index 0000000000..3178d53f05 --- /dev/null +++ b/changelog.d/11386.misc @@ -0,0 +1 @@ +Use `auto_attribs` on the `attrs` class `RefreshTokenLookupResult`. diff --git a/changelog.d/11388.misc b/changelog.d/11388.misc new file mode 100644 index 0000000000..7ce7ad0498 --- /dev/null +++ b/changelog.d/11388.misc @@ -0,0 +1 @@ +Rename unstable `access_token_lifetime` configuration option to `refreshable_access_token_lifetime` to make it clear it only concerns refreshable access tokens. \ No newline at end of file diff --git a/changelog.d/11389.misc b/changelog.d/11389.misc new file mode 100644 index 0000000000..33672bb075 --- /dev/null +++ b/changelog.d/11389.misc @@ -0,0 +1 @@ +Do not run the broken MSC2716 tests when running `scripts-dev/complement.sh`. diff --git a/changelog.d/11391.feature b/changelog.d/11391.feature new file mode 100644 index 0000000000..4f696285a7 --- /dev/null +++ b/changelog.d/11391.feature @@ -0,0 +1 @@ +Store and allow querying of arbitrary event relations. diff --git a/changelog.d/11392.bugfix b/changelog.d/11392.bugfix new file mode 100644 index 0000000000..fb15800327 --- /dev/null +++ b/changelog.d/11392.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.13.0 where creating and publishing a room could cause errors if `room_list_publication_rules` is configured. diff --git a/changelog.d/11393.misc b/changelog.d/11393.misc new file mode 100644 index 0000000000..6bf47ec2e0 --- /dev/null +++ b/changelog.d/11393.misc @@ -0,0 +1 @@ +Remove dead code from supporting ACME. diff --git a/changelog.d/11395.removal b/changelog.d/11395.removal new file mode 100644 index 0000000000..6c1fd560ad --- /dev/null +++ b/changelog.d/11395.removal @@ -0,0 +1 @@ +Remove deprecated `trust_identity_server_for_password_resets` configuration flag. \ No newline at end of file diff --git a/changelog.d/11408.misc b/changelog.d/11408.misc new file mode 100644 index 0000000000..55ed064672 --- /dev/null +++ b/changelog.d/11408.misc @@ -0,0 +1 @@ +Refactor including the bundled relations when serializing an event. diff --git a/changelog.d/11411.misc b/changelog.d/11411.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11411.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/changelog.d/11413.bugfix b/changelog.d/11413.bugfix new file mode 100644 index 0000000000..44111d8152 --- /dev/null +++ b/changelog.d/11413.bugfix @@ -0,0 +1 @@ +The `/send_join` response now includes the stable `event` field instead of the unstable field from [MSC3083](https://github.com/matrix-org/matrix-doc/pull/3083). diff --git a/changelog.d/11415.doc b/changelog.d/11415.doc new file mode 100644 index 0000000000..e405531867 --- /dev/null +++ b/changelog.d/11415.doc @@ -0,0 +1 @@ +Update the media repository documentation. diff --git a/changelog.d/11421.bugfix b/changelog.d/11421.bugfix new file mode 100644 index 0000000000..28ac65ea7c --- /dev/null +++ b/changelog.d/11421.bugfix @@ -0,0 +1 @@ +Improve performance of various background database schema updates. diff --git a/changelog.d/11422.bugfix b/changelog.d/11422.bugfix new file mode 100644 index 0000000000..28ac65ea7c --- /dev/null +++ b/changelog.d/11422.bugfix @@ -0,0 +1 @@ +Improve performance of various background database schema updates. diff --git a/changelog.d/11425.feature b/changelog.d/11425.feature new file mode 100644 index 0000000000..806dd5d91c --- /dev/null +++ b/changelog.d/11425.feature @@ -0,0 +1 @@ +Support expiry of refresh tokens and expiry of the overall session when refresh tokens are in use. \ No newline at end of file diff --git a/changelog.d/11428.misc b/changelog.d/11428.misc new file mode 100644 index 0000000000..2f814fa5fb --- /dev/null +++ b/changelog.d/11428.misc @@ -0,0 +1 @@ +Add type annotations to some of the configuration surrounding refresh tokens. \ No newline at end of file diff --git a/changelog.d/11429.docker b/changelog.d/11429.docker new file mode 100644 index 0000000000..81db719ed6 --- /dev/null +++ b/changelog.d/11429.docker @@ -0,0 +1 @@ +Update `Dockerfile-workers` to healthcheck all workers in container. diff --git a/changelog.d/11430.misc b/changelog.d/11430.misc new file mode 100644 index 0000000000..28f06f4c4e --- /dev/null +++ b/changelog.d/11430.misc @@ -0,0 +1 @@ +Update [MSC2918 refresh token](https://github.com/matrix-org/matrix-doc/blob/main/proposals/2918-refreshtokens.md#msc2918-refresh-tokens) support to confirm with the latest revision: accept the `refresh_tokens` parameter in the request body rather than in the URL parameters. \ No newline at end of file diff --git a/changelog.d/11439.bugfix b/changelog.d/11439.bugfix new file mode 100644 index 0000000000..fc6bc82b36 --- /dev/null +++ b/changelog.d/11439.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in 1.47.0 where `send_join` could fail due to an outdated `ijson` version. diff --git a/changelog.d/11440.bugfix b/changelog.d/11440.bugfix new file mode 100644 index 0000000000..02ce2e428f --- /dev/null +++ b/changelog.d/11440.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.36 which could cause problems fetching event-signing keys from trusted key servers. diff --git a/changelog.d/11441.bugfix b/changelog.d/11441.bugfix new file mode 100644 index 0000000000..1baef41d70 --- /dev/null +++ b/changelog.d/11441.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in 1.47.0 where `send_join` could fail due to an outdated `ijson` version. \ No newline at end of file diff --git a/debian/changelog b/debian/changelog index b3ebfb84c7..35c9063388 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,21 @@ +matrix-synapse-py3 (1.47.1) stable; urgency=medium + + * New synapse release 1.47.1. + + -- Synapse Packaging team <packages@matrix.org> Fri, 19 Nov 2021 13:44:32 +0000 + +matrix-synapse-py3 (1.47.0) stable; urgency=medium + + * New synapse release 1.47.0. + + -- Synapse Packaging team <packages@matrix.org> Wed, 17 Nov 2021 13:09:43 +0000 + +matrix-synapse-py3 (1.47.0~rc3) stable; urgency=medium + + * New synapse release 1.47.0~rc3. + + -- Synapse Packaging team <packages@matrix.org> Tue, 16 Nov 2021 14:32:47 +0000 + matrix-synapse-py3 (1.47.0~rc2) stable; urgency=medium [ Dan Callahan ] diff --git a/docker/Dockerfile-workers b/docker/Dockerfile-workers index 969cf97286..46f2e17382 100644 --- a/docker/Dockerfile-workers +++ b/docker/Dockerfile-workers @@ -21,3 +21,6 @@ VOLUME ["/data"] # files to run the desired worker configuration. Will start supervisord. COPY ./docker/configure_workers_and_start.py /configure_workers_and_start.py ENTRYPOINT ["/configure_workers_and_start.py"] + +HEALTHCHECK --start-period=5s --interval=15s --timeout=5s \ + CMD /bin/sh /healthcheck.sh diff --git a/docker/conf-workers/healthcheck.sh.j2 b/docker/conf-workers/healthcheck.sh.j2 new file mode 100644 index 0000000000..79c621f89c --- /dev/null +++ b/docker/conf-workers/healthcheck.sh.j2 @@ -0,0 +1,6 @@ +#!/bin/sh +# This healthcheck script is designed to return OK when every +# host involved returns OK +{%- for healthcheck_url in healthcheck_urls %} +curl -fSs {{ healthcheck_url }} || exit 1 +{%- endfor %} diff --git a/docker/conf/homeserver.yaml b/docker/conf/homeserver.yaml index 3cba594d02..f10f78a48c 100644 --- a/docker/conf/homeserver.yaml +++ b/docker/conf/homeserver.yaml @@ -148,14 +148,6 @@ bcrypt_rounds: 12 allow_guest_access: {{ "True" if SYNAPSE_ALLOW_GUEST else "False" }} enable_group_creation: true -# The list of identity servers trusted to verify third party -# identifiers by this server. -# -# Also defines the ID server which will be called when an account is -# deactivated (one will be picked arbitrarily). -trusted_third_party_id_servers: - - matrix.org - - vector.im ## Metrics ### diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index efb9476cd6..adbb551cee 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -48,7 +48,7 @@ WORKERS_CONFIG = { "app": "synapse.app.user_dir", "listener_resources": ["client"], "endpoint_patterns": [ - "^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$" + "^/_matrix/client/(api/v1|r0|v3|unstable)/user_directory/search$" ], "shared_extra_conf": {"update_user_directory": False}, "worker_extra_conf": "", @@ -85,10 +85,10 @@ WORKERS_CONFIG = { "app": "synapse.app.generic_worker", "listener_resources": ["client"], "endpoint_patterns": [ - "^/_matrix/client/(v2_alpha|r0)/sync$", - "^/_matrix/client/(api/v1|v2_alpha|r0)/events$", - "^/_matrix/client/(api/v1|r0)/initialSync$", - "^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$", + "^/_matrix/client/(v2_alpha|r0|v3)/sync$", + "^/_matrix/client/(api/v1|v2_alpha|r0|v3)/events$", + "^/_matrix/client/(api/v1|r0|v3)/initialSync$", + "^/_matrix/client/(api/v1|r0|v3)/rooms/[^/]+/initialSync$", ], "shared_extra_conf": {}, "worker_extra_conf": "", @@ -146,11 +146,11 @@ WORKERS_CONFIG = { "app": "synapse.app.generic_worker", "listener_resources": ["client"], "endpoint_patterns": [ - "^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/redact", - "^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send", - "^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$", - "^/_matrix/client/(api/v1|r0|unstable)/join/", - "^/_matrix/client/(api/v1|r0|unstable)/profile/", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/redact", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/send", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/join/", + "^/_matrix/client/(api/v1|r0|v3|unstable)/profile/", ], "shared_extra_conf": {}, "worker_extra_conf": "", @@ -158,7 +158,7 @@ WORKERS_CONFIG = { "frontend_proxy": { "app": "synapse.app.frontend_proxy", "listener_resources": ["client", "replication"], - "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|unstable)/keys/upload"], + "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|v3|unstable)/keys/upload"], "shared_extra_conf": {}, "worker_extra_conf": ( "worker_main_http_uri: http://127.0.0.1:%d" @@ -474,10 +474,16 @@ def generate_worker_files(environ, config_path: str, data_dir: str): # Determine the load-balancing upstreams to configure nginx_upstream_config = "" + + # At the same time, prepare a list of internal endpoints to healthcheck + # starting with the main process which exists even if no workers do. + healthcheck_urls = ["http://localhost:8080/health"] + for upstream_worker_type, upstream_worker_ports in nginx_upstreams.items(): body = "" for port in upstream_worker_ports: body += " server localhost:%d;\n" % (port,) + healthcheck_urls.append("http://localhost:%d/health" % (port,)) # Add to the list of configured upstreams nginx_upstream_config += NGINX_UPSTREAM_CONFIG_BLOCK.format( @@ -510,6 +516,13 @@ def generate_worker_files(environ, config_path: str, data_dir: str): worker_config=supervisord_config, ) + # healthcheck config + convert( + "/conf/healthcheck.sh.j2", + "/healthcheck.sh", + healthcheck_urls=healthcheck_urls, + ) + # Ensure the logging directory exists log_dir = data_dir + "/logs" if not os.path.exists(log_dir): diff --git a/docs/README.md b/docs/README.md index 6d70f5afff..5222ee5f03 100644 --- a/docs/README.md +++ b/docs/README.md @@ -50,8 +50,10 @@ build the documentation with: mdbook build ``` -The rendered contents will be outputted to a new `book/` directory at the root of the repository. You can -browse the book by opening `book/index.html` in a web browser. +The rendered contents will be outputted to a new `book/` directory at the root of the repository. Please note that +index.html is not built by default, it is created by copying over the file `welcome_and_overview.html` to `index.html` +during deployment. Thus, when running `mdbook serve` locally the book will initially show a 404 in place of the index +due to the above. Do not be alarmed! You can also have mdbook host the docs on a local webserver with hot-reload functionality via: diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md index 6a6ae92d66..0f1a74134f 100644 --- a/docs/admin_api/rooms.md +++ b/docs/admin_api/rooms.md @@ -3,6 +3,7 @@ - [Room Details API](#room-details-api) - [Room Members API](#room-members-api) - [Room State API](#room-state-api) +- [Block Room API](#block-room-api) - [Delete Room API](#delete-room-api) * [Version 1 (old version)](#version-1-old-version) * [Version 2 (new version)](#version-2-new-version) @@ -386,6 +387,83 @@ A response body like the following is returned: } ``` +# Block Room API +The Block Room admin API allows server admins to block and unblock rooms, +and query to see if a given room is blocked. +This API can be used to pre-emptively block a room, even if it's unknown to this +homeserver. Users will be prevented from joining a blocked room. + +## Block or unblock a room + +The API is: + +``` +PUT /_synapse/admin/v1/rooms/<room_id>/block +``` + +with a body of: + +```json +{ + "block": true +} +``` + +A response body like the following is returned: + +```json +{ + "block": true +} +``` + +**Parameters** + +The following parameters should be set in the URL: + +- `room_id` - The ID of the room. + +The following JSON body parameters are available: + +- `block` - If `true` the room will be blocked and if `false` the room will be unblocked. + +**Response** + +The following fields are possible in the JSON response body: + +- `block` - A boolean. `true` if the room is blocked, otherwise `false` + +## Get block status + +The API is: + +``` +GET /_synapse/admin/v1/rooms/<room_id>/block +``` + +A response body like the following is returned: + +```json +{ + "block": true, + "user_id": "<user_id>" +} +``` + +**Parameters** + +The following parameters should be set in the URL: + +- `room_id` - The ID of the room. + +**Response** + +The following fields are possible in the JSON response body: + +- `block` - A boolean. `true` if the room is blocked, otherwise `false` +- `user_id` - An optional string. If the room is blocked (`block` is `true`) shows + the user who has add the room to blocking list. Otherwise it is not displayed. + # Delete Room API The Delete Room admin API allows server admins to remove rooms from the server diff --git a/docs/jwt.md b/docs/jwt.md index 5be9fd26e3..32f58cc0cb 100644 --- a/docs/jwt.md +++ b/docs/jwt.md @@ -22,8 +22,9 @@ will be removed in a future version of Synapse. The `token` field should include the JSON web token with the following claims: -* The `sub` (subject) claim is required and should encode the local part of the - user ID. +* A claim that encodes the local part of the user ID is required. By default, + the `sub` (subject) claim is used, or a custom claim can be set in the + configuration file. * The expiration time (`exp`), not before time (`nbf`), and issued at (`iat`) claims are optional, but validated if present. * The issuer (`iss`) claim is optional, but required and validated if configured. diff --git a/docs/media_repository.md b/docs/media_repository.md index 99ee8f1ef7..ba17f8a856 100644 --- a/docs/media_repository.md +++ b/docs/media_repository.md @@ -2,29 +2,80 @@ *Synapse implementation-specific details for the media repository* -The media repository is where attachments and avatar photos are stored. -It stores attachment content and thumbnails for media uploaded by local users. -It caches attachment content and thumbnails for media uploaded by remote users. +The media repository + * stores avatars, attachments and their thumbnails for media uploaded by local + users. + * caches avatars, attachments and their thumbnails for media uploaded by remote + users. + * caches resources and thumbnails used for + [URL previews](development/url_previews.md). -## Storage +All media in Matrix can be identified by a unique +[MXC URI](https://spec.matrix.org/latest/client-server-api/#matrix-content-mxc-uris), +consisting of a server name and media ID: +``` +mxc://<server-name>/<media-id> +``` -Each item of media is assigned a `media_id` when it is uploaded. -The `media_id` is a randomly chosen, URL safe 24 character string. +## Local Media +Synapse generates 24 character media IDs for content uploaded by local users. +These media IDs consist of upper and lowercase letters and are case-sensitive. +Other homeserver implementations may generate media IDs differently. -Metadata such as the MIME type, upload time and length are stored in the -sqlite3 database indexed by `media_id`. +Local media is recorded in the `local_media_repository` table, which includes +metadata such as MIME types, upload times and file sizes. +Note that this table is shared by the URL cache, which has a different media ID +scheme. -Content is stored on the filesystem under a `"local_content"` directory. +### Paths +A file with media ID `aabbcccccccccccccccccccc` and its `128x96` `image/jpeg` +thumbnail, created by scaling, would be stored at: +``` +local_content/aa/bb/cccccccccccccccccccc +local_thumbnails/aa/bb/cccccccccccccccccccc/128-96-image-jpeg-scale +``` -Thumbnails are stored under a `"local_thumbnails"` directory. +## Remote Media +When media from a remote homeserver is requested from Synapse, it is assigned +a local `filesystem_id`, with the same format as locally-generated media IDs, +as described above. -The item with `media_id` `"aabbccccccccdddddddddddd"` is stored under -`"local_content/aa/bb/ccccccccdddddddddddd"`. Its thumbnail with width -`128` and height `96` and type `"image/jpeg"` is stored under -`"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"` +A record of remote media is stored in the `remote_media_cache` table, which +can be used to map remote MXC URIs (server names and media IDs) to local +`filesystem_id`s. -Remote content is cached under `"remote_content"` directory. Each item of -remote content is assigned a local `"filesystem_id"` to ensure that the -directory structure `"remote_content/server_name/aa/bb/ccccccccdddddddddddd"` -is appropriate. Thumbnails for remote content are stored under -`"remote_thumbnail/server_name/..."` +### Paths +A file from `matrix.org` with `filesystem_id` `aabbcccccccccccccccccccc` and its +`128x96` `image/jpeg` thumbnail, created by scaling, would be stored at: +``` +remote_content/matrix.org/aa/bb/cccccccccccccccccccc +remote_thumbnail/matrix.org/aa/bb/cccccccccccccccccccc/128-96-image-jpeg-scale +``` +Older thumbnails may omit the thumbnailing method: +``` +remote_thumbnail/matrix.org/aa/bb/cccccccccccccccccccc/128-96-image-jpeg +``` + +Note that `remote_thumbnail/` does not have an `s`. + +## URL Previews +See [URL Previews](development/url_previews.md) for documentation on the URL preview +process. + +When generating previews for URLs, Synapse may download and cache various +resources, including images. These resources are assigned temporary media IDs +of the form `yyyy-mm-dd_aaaaaaaaaaaaaaaa`, where `yyyy-mm-dd` is the current +date and `aaaaaaaaaaaaaaaa` is a random sequence of 16 case-sensitive letters. + +The metadata for these cached resources is stored in the +`local_media_repository` and `local_media_repository_url_cache` tables. + +Resources for URL previews are deleted after a few days. + +### Paths +The file with media ID `yyyy-mm-dd_aaaaaaaaaaaaaaaa` and its `128x96` +`image/jpeg` thumbnail, created by scaling, would be stored at: +``` +url_cache/yyyy-mm-dd/aaaaaaaaaaaaaaaa +url_cache_thumbnails/yyyy-mm-dd/aaaaaaaaaaaaaaaa/128-96-image-jpeg-scale +``` diff --git a/docs/modules/background_update_controller_callbacks.md b/docs/modules/background_update_controller_callbacks.md new file mode 100644 index 0000000000..b3e7c259f4 --- /dev/null +++ b/docs/modules/background_update_controller_callbacks.md @@ -0,0 +1,71 @@ +# Background update controller callbacks + +Background update controller callbacks allow module developers to control (e.g. rate-limit) +how database background updates are run. A database background update is an operation +Synapse runs on its database in the background after it starts. It's usually used to run +database operations that would take too long if they were run at the same time as schema +updates (which are run on startup) and delay Synapse's startup too much: populating a +table with a big amount of data, adding an index on a big table, deleting superfluous data, +etc. + +Background update controller callbacks can be registered using the module API's +`register_background_update_controller_callbacks` method. Only the first module (in order +of appearance in Synapse's configuration file) calling this method can register background +update controller callbacks, subsequent calls are ignored. + +The available background update controller callbacks are: + +### `on_update` + +_First introduced in Synapse v1.49.0_ + +```python +def on_update(update_name: str, database_name: str, one_shot: bool) -> AsyncContextManager[int] +``` + +Called when about to do an iteration of a background update. The module is given the name +of the update, the name of the database, and a flag to indicate whether the background +update will happen in one go and may take a long time (e.g. creating indices). If this last +argument is set to `False`, the update will be run in batches. + +The module must return an async context manager. It will be entered before Synapse runs a +background update; this should return the desired duration of the iteration, in +milliseconds. + +The context manager will be exited when the iteration completes. Note that the duration +returned by the context manager is a target, and an iteration may take substantially longer +or shorter. If the `one_shot` flag is set to `True`, the duration returned is ignored. + +__Note__: Unlike most module callbacks in Synapse, this one is _synchronous_. This is +because asynchronous operations are expected to be run by the async context manager. + +This callback is required when registering any other background update controller callback. + +### `default_batch_size` + +_First introduced in Synapse v1.49.0_ + +```python +async def default_batch_size(update_name: str, database_name: str) -> int +``` + +Called before the first iteration of a background update, with the name of the update and +of the database. The module must return the number of elements to process in this first +iteration. + +If this callback is not defined, Synapse will use a default value of 100. + +### `min_batch_size` + +_First introduced in Synapse v1.49.0_ + +```python +async def min_batch_size(update_name: str, database_name: str) -> int +``` + +Called before running a new batch for a background update, with the name of the update and +of the database. The module must return an integer representing the minimum number of +elements to process in this iteration. This number must be at least 1, and is used to +ensure that progress is always made. + +If this callback is not defined, Synapse will use a default value of 100. diff --git a/docs/modules/writing_a_module.md b/docs/modules/writing_a_module.md index 7764e06692..e7c0ffad58 100644 --- a/docs/modules/writing_a_module.md +++ b/docs/modules/writing_a_module.md @@ -71,15 +71,15 @@ Modules **must** register their web resources in their `__init__` method. ## Registering a callback Modules can use Synapse's module API to register callbacks. Callbacks are functions that -Synapse will call when performing specific actions. Callbacks must be asynchronous, and -are split in categories. A single module may implement callbacks from multiple categories, -and is under no obligation to implement all callbacks from the categories it registers -callbacks for. +Synapse will call when performing specific actions. Callbacks must be asynchronous (unless +specified otherwise), and are split in categories. A single module may implement callbacks +from multiple categories, and is under no obligation to implement all callbacks from the +categories it registers callbacks for. Modules can register callbacks using one of the module API's `register_[...]_callbacks` methods. The callback functions are passed to these methods as keyword arguments, with -the callback name as the argument name and the function as its value. This is demonstrated -in the example below. A `register_[...]_callbacks` method exists for each category. +the callback name as the argument name and the function as its value. A +`register_[...]_callbacks` method exists for each category. Callbacks for each category can be found on their respective page of the [Synapse documentation website](https://matrix-org.github.io/synapse). \ No newline at end of file diff --git a/docs/password_auth_providers.md b/docs/password_auth_providers.md index d7beacfff3..dc0dfffa21 100644 --- a/docs/password_auth_providers.md +++ b/docs/password_auth_providers.md @@ -1,7 +1,7 @@ <h2 style="color:red"> This page of the Synapse documentation is now deprecated. For up to date documentation on setting up or writing a password auth provider module, please see -<a href="modules.md">this page</a>. +<a href="modules/index.md">this page</a>. </h2> # Password auth provider modules diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index d48c08f1d9..ae476d19ac 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -647,8 +647,8 @@ retention: # #federation_certificate_verification_whitelist: # - lon.example.com -# - *.domain.com -# - *.onion +# - "*.domain.com" +# - "*.onion" # List of custom certificate authorities for federation traffic. # @@ -2039,6 +2039,12 @@ sso: # #algorithm: "provided-by-your-issuer" + # Name of the claim containing a unique identifier for the user. + # + # Optional, defaults to `sub`. + # + #subject_claim: "sub" + # The issuer to validate the "iss" claim against. # # Optional, if provided the "iss" claim will be required and @@ -2360,8 +2366,8 @@ user_directory: # indexes were (re)built was before Synapse 1.44, you'll have to # rebuild the indexes in order to search through all known users. # These indexes are built the first time Synapse starts; admins can - # manually trigger a rebuild following the instructions at - # https://matrix-org.github.io/synapse/latest/user_directory.html + # manually trigger a rebuild via API following the instructions at + # https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/background_updates.html#run # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/docs/turn-howto.md b/docs/turn-howto.md index 06b2ef2c9a..e6812de69e 100644 --- a/docs/turn-howto.md +++ b/docs/turn-howto.md @@ -220,7 +220,7 @@ Here are a few things to try: anyone who has successfully set this up. * Check that you have opened your firewall to allow TCP and UDP traffic to the - TURN ports (normally 3478 and 5479). + TURN ports (normally 3478 and 5349). * Check that you have opened your firewall to allow UDP traffic to the UDP relay ports (49152-65535 by default). diff --git a/docs/usage/administration/admin_api/background_updates.md b/docs/usage/administration/admin_api/background_updates.md index b36d7fe398..9f6ac7d567 100644 --- a/docs/usage/administration/admin_api/background_updates.md +++ b/docs/usage/administration/admin_api/background_updates.md @@ -42,7 +42,6 @@ For each update: `average_items_per_ms` how many items are processed per millisecond based on an exponential average. - ## Enabled This API allow pausing background updates. @@ -82,3 +81,29 @@ The API returns the `enabled` param. ``` There is also a `GET` version which returns the `enabled` state. + + +## Run + +This API schedules a specific background update to run. The job starts immediately after calling the API. + + +The API is: + +``` +POST /_synapse/admin/v1/background_updates/start_job +``` + +with the following body: + +```json +{ + "job_name": "populate_stats_process_rooms" +} +``` + +The following JSON body parameters are available: + +- `job_name` - A string which job to run. Valid values are: + - `populate_stats_process_rooms` - Recalculate the stats for all rooms. + - `regenerate_directory` - Recalculate the [user directory](../../../user_directory.md) if it is stale or out of sync. diff --git a/docs/user_directory.md b/docs/user_directory.md index 07fe954891..c4794b04cf 100644 --- a/docs/user_directory.md +++ b/docs/user_directory.md @@ -6,9 +6,9 @@ on this particular server - i.e. ones which your account shares a room with, or who are present in a publicly viewable room present on the server. The directory info is stored in various tables, which can (typically after -DB corruption) get stale or out of sync. If this happens, for now the -solution to fix it is to execute the SQL [here](https://github.com/matrix-org/synapse/blob/master/synapse/storage/schema/main/delta/53/user_dir_populate.sql) -and then restart synapse. This should then start a background task to +DB corruption) get stale or out of sync. If this happens, for now the +solution to fix it is to use the [admin API](usage/administration/admin_api/background_updates.md#run) +and execute the job `regenerate_directory`. This should then start a background task to flush the current tables and regenerate the directory. Data model diff --git a/docs/workers.md b/docs/workers.md index f88e2c1de3..fd83e2ddeb 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -182,10 +182,10 @@ This worker can handle API requests matching the following regular expressions: # Sync requests - ^/_matrix/client/(v2_alpha|r0)/sync$ - ^/_matrix/client/(api/v1|v2_alpha|r0)/events$ - ^/_matrix/client/(api/v1|r0)/initialSync$ - ^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$ + ^/_matrix/client/(v2_alpha|r0|v3)/sync$ + ^/_matrix/client/(api/v1|v2_alpha|r0|v3)/events$ + ^/_matrix/client/(api/v1|r0|v3)/initialSync$ + ^/_matrix/client/(api/v1|r0|v3)/rooms/[^/]+/initialSync$ # Federation requests ^/_matrix/federation/v1/event/ @@ -210,46 +210,46 @@ expressions: ^/_matrix/federation/v1/get_groups_publicised$ ^/_matrix/key/v2/query ^/_matrix/federation/unstable/org.matrix.msc2946/spaces/ - ^/_matrix/federation/unstable/org.matrix.msc2946/hierarchy/ + ^/_matrix/federation/(v1|unstable/org.matrix.msc2946)/hierarchy/ # Inbound federation transaction request ^/_matrix/federation/v1/send/ # Client API requests - ^/_matrix/client/(api/v1|r0|unstable)/createRoom$ - ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/createRoom$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/publicRooms$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/joined_members$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/context/.*$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$ ^/_matrix/client/unstable/org.matrix.msc2946/rooms/.*/spaces$ - ^/_matrix/client/unstable/org.matrix.msc2946/rooms/.*/hierarchy$ + ^/_matrix/client/(v1|unstable/org.matrix.msc2946)/rooms/.*/hierarchy$ ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$ - ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ - ^/_matrix/client/(api/v1|r0|unstable)/devices$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/account/3pid$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/devices$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/keys/query$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/keys/changes$ ^/_matrix/client/versions$ - ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$ - ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$ - ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$ - ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/event/ - ^/_matrix/client/(api/v1|r0|unstable)/joined_rooms$ - ^/_matrix/client/(api/v1|r0|unstable)/search$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/joined_groups$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/publicised_groups$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/publicised_groups/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/search$ # Registration/login requests - ^/_matrix/client/(api/v1|r0|unstable)/login$ - ^/_matrix/client/(r0|unstable)/register$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/login$ + ^/_matrix/client/(r0|v3|unstable)/register$ ^/_matrix/client/unstable/org.matrix.msc3231/register/org.matrix.msc3231.login.registration_token/validity$ # Event sending requests - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/redact - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ - ^/_matrix/client/(api/v1|r0|unstable)/join/ - ^/_matrix/client/(api/v1|r0|unstable)/profile/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/redact + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/send + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/join/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/profile/ Additionally, the following REST endpoints can be handled for GET requests: @@ -261,14 +261,14 @@ room must be routed to the same instance. Additionally, care must be taken to ensure that the purge history admin API is not used while pagination requests for the room are in flight: - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$ Additionally, the following endpoints should be included if Synapse is configured to use SSO (you only need to include the ones for whichever SSO provider you're using): # for all SSO providers - ^/_matrix/client/(api/v1|r0|unstable)/login/sso/redirect + ^/_matrix/client/(api/v1|r0|v3|unstable)/login/sso/redirect ^/_synapse/client/pick_idp$ ^/_synapse/client/pick_username ^/_synapse/client/new_user_consent$ @@ -281,7 +281,7 @@ using): ^/_synapse/client/saml2/authn_response$ # CAS requests. - ^/_matrix/client/(api/v1|r0|unstable)/login/cas/ticket$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/login/cas/ticket$ Ensure that all SSO logins go to a single process. For multiple workers not handling the SSO endpoints properly, see @@ -465,7 +465,7 @@ Note that if a reverse proxy is used , then `/_matrix/media/` must be routed for Handles searches in the user directory. It can handle REST endpoints matching the following regular expressions: - ^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/user_directory/search$ When using this worker you must also set `update_user_directory: False` in the shared configuration file to stop the main synapse running background @@ -477,12 +477,12 @@ Proxies some frequently-requested client endpoints to add caching and remove load from the main synapse. It can handle REST endpoints matching the following regular expressions: - ^/_matrix/client/(api/v1|r0|unstable)/keys/upload + ^/_matrix/client/(api/v1|r0|v3|unstable)/keys/upload If `use_presence` is False in the homeserver config, it can also handle REST endpoints matching the following regular expressions: - ^/_matrix/client/(api/v1|r0|unstable)/presence/[^/]+/status + ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/[^/]+/status This "stub" presence handler will pass through `GET` request but make the `PUT` effectively a no-op. diff --git a/mypy.ini b/mypy.ini index b2953974ea..51056a8f64 100644 --- a/mypy.ini +++ b/mypy.ini @@ -33,7 +33,6 @@ exclude = (?x) |synapse/storage/databases/main/event_federation.py |synapse/storage/databases/main/event_push_actions.py |synapse/storage/databases/main/events_bg_updates.py - |synapse/storage/databases/main/events_worker.py |synapse/storage/databases/main/group_server.py |synapse/storage/databases/main/metrics.py |synapse/storage/databases/main/monthly_active_users.py @@ -151,6 +150,9 @@ disallow_untyped_defs = True [mypy-synapse.app.*] disallow_untyped_defs = True +[mypy-synapse.config._base] +disallow_untyped_defs = True + [mypy-synapse.crypto.*] disallow_untyped_defs = True @@ -160,6 +162,12 @@ disallow_untyped_defs = True [mypy-synapse.handlers.*] disallow_untyped_defs = True +[mypy-synapse.metrics.*] +disallow_untyped_defs = True + +[mypy-synapse.module_api.*] +disallow_untyped_defs = True + [mypy-synapse.push.*] disallow_untyped_defs = True @@ -178,6 +186,9 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.directory] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.events_worker] +disallow_untyped_defs = True + [mypy-synapse.storage.databases.main.room_batch] disallow_untyped_defs = True @@ -196,92 +207,11 @@ disallow_untyped_defs = True [mypy-synapse.streams.*] disallow_untyped_defs = True -[mypy-synapse.util.batching_queue] -disallow_untyped_defs = True - -[mypy-synapse.util.caches.cached_call] -disallow_untyped_defs = True - -[mypy-synapse.util.caches.dictionary_cache] -disallow_untyped_defs = True - -[mypy-synapse.util.caches.lrucache] -disallow_untyped_defs = True - -[mypy-synapse.util.caches.response_cache] -disallow_untyped_defs = True - -[mypy-synapse.util.caches.stream_change_cache] -disallow_untyped_defs = True - -[mypy-synapse.util.caches.ttl_cache] -disallow_untyped_defs = True - -[mypy-synapse.util.daemonize] -disallow_untyped_defs = True - -[mypy-synapse.util.file_consumer] -disallow_untyped_defs = True - -[mypy-synapse.util.frozenutils] -disallow_untyped_defs = True - -[mypy-synapse.util.hash] -disallow_untyped_defs = True - -[mypy-synapse.util.httpresourcetree] -disallow_untyped_defs = True - -[mypy-synapse.util.iterutils] -disallow_untyped_defs = True - -[mypy-synapse.util.linked_list] +[mypy-synapse.util.*] disallow_untyped_defs = True -[mypy-synapse.util.logcontext] -disallow_untyped_defs = True - -[mypy-synapse.util.logformatter] -disallow_untyped_defs = True - -[mypy-synapse.util.macaroons] -disallow_untyped_defs = True - -[mypy-synapse.util.manhole] -disallow_untyped_defs = True - -[mypy-synapse.util.module_loader] -disallow_untyped_defs = True - -[mypy-synapse.util.msisdn] -disallow_untyped_defs = True - -[mypy-synapse.util.patch_inline_callbacks] -disallow_untyped_defs = True - -[mypy-synapse.util.ratelimitutils] -disallow_untyped_defs = True - -[mypy-synapse.util.retryutils] -disallow_untyped_defs = True - -[mypy-synapse.util.rlimit] -disallow_untyped_defs = True - -[mypy-synapse.util.stringutils] -disallow_untyped_defs = True - -[mypy-synapse.util.templates] -disallow_untyped_defs = True - -[mypy-synapse.util.threepids] -disallow_untyped_defs = True - -[mypy-synapse.util.wheel_timer] -disallow_untyped_defs = True - -[mypy-synapse.util.versionstring] -disallow_untyped_defs = True +[mypy-synapse.util.caches.treecache] +disallow_untyped_defs = False [mypy-tests.handlers.test_user_directory] disallow_untyped_defs = True @@ -295,6 +225,10 @@ disallow_untyped_defs = True [mypy-tests.rest.client.test_directory] disallow_untyped_defs = True +[mypy-tests.federation.transport.test_client] +disallow_untyped_defs = True + + ;; Dependencies without annotations ;; Before ignoring a module, check to see if type stubs are available. ;; The `typeshed` project maintains stubs here: diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 7d38b39e90..53295b58fc 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -24,7 +24,7 @@ set -e # Change to the repository root -cd "$(dirname "$0")/.." +cd "$(dirname $0)/.." # Check for a user-specified Complement checkout if [[ -z "$COMPLEMENT_DIR" ]]; then @@ -61,8 +61,8 @@ cd "$COMPLEMENT_DIR" EXTRA_COMPLEMENT_ARGS="" if [[ -n "$1" ]]; then # A test name regex has been set, supply it to Complement - EXTRA_COMPLEMENT_ARGS=(-run "$1") + EXTRA_COMPLEMENT_ARGS+="-run $1 " fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2403,msc2716 -count=1 "${EXTRA_COMPLEMENT_ARGS[@]}" ./tests/... +go test -v -tags synapse_blacklist,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests/... diff --git a/setup.py b/setup.py index 0ce8beb004..ad99b3bd2c 100755 --- a/setup.py +++ b/setup.py @@ -119,7 +119,9 @@ CONDITIONAL_REQUIREMENTS["mypy"] = [ # Tests assume that all optional dependencies are installed. # # parameterized_class decorator was introduced in parameterized 0.7.0 -CONDITIONAL_REQUIREMENTS["test"] = ["parameterized>=0.7.0"] +# +# We use `mock` library as that backports `AsyncMock` to Python 3.6 +CONDITIONAL_REQUIREMENTS["test"] = ["parameterized>=0.7.0", "mock>=4.0.0"] CONDITIONAL_REQUIREMENTS["dev"] = ( CONDITIONAL_REQUIREMENTS["lint"] diff --git a/synapse/__init__.py b/synapse/__init__.py index 06b179a7e8..48ac38aec6 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ try: except ImportError: pass -__version__ = "1.47.0rc2" +__version__ = "1.47.1" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 4486b3bc7d..f9f9467dc1 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -30,7 +30,8 @@ FEDERATION_UNSTABLE_PREFIX = FEDERATION_PREFIX + "/unstable" STATIC_PREFIX = "/_matrix/static" WEB_CLIENT_PREFIX = "/_matrix/client" SERVER_KEY_V2_PREFIX = "/_matrix/key/v2" -MEDIA_PREFIX = "/_matrix/media/r0" +MEDIA_R0_PREFIX = "/_matrix/media/r0" +MEDIA_V3_PREFIX = "/_matrix/media/v3" LEGACY_MEDIA_PREFIX = "/_matrix/media/v1" diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 573bb487b2..807ee3d46e 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -402,7 +402,7 @@ async def start(hs: "HomeServer") -> None: if hasattr(signal, "SIGHUP"): @wrap_as_background_process("sighup") - def handle_sighup(*args: Any, **kwargs: Any) -> None: + async def handle_sighup(*args: Any, **kwargs: Any) -> None: # Tell systemd our state, if we're using it. This will silently fail if # we're not using systemd. sdnotify(b"RELOADING=1") diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 46f0feff70..b4bed5bf40 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -26,7 +26,8 @@ from synapse.api.urls import ( CLIENT_API_PREFIX, FEDERATION_PREFIX, LEGACY_MEDIA_PREFIX, - MEDIA_PREFIX, + MEDIA_R0_PREFIX, + MEDIA_V3_PREFIX, SERVER_KEY_V2_PREFIX, ) from synapse.app import _base @@ -112,6 +113,7 @@ from synapse.storage.databases.main.monthly_active_users import ( ) from synapse.storage.databases.main.presence import PresenceStore from synapse.storage.databases.main.room import RoomWorkerStore +from synapse.storage.databases.main.room_batch import RoomBatchStore from synapse.storage.databases.main.search import SearchStore from synapse.storage.databases.main.session import SessionStore from synapse.storage.databases.main.stats import StatsStore @@ -239,6 +241,7 @@ class GenericWorkerSlavedStore( SlavedEventStore, SlavedKeyStore, RoomWorkerStore, + RoomBatchStore, DirectoryStore, SlavedApplicationServiceStore, SlavedRegistrationStore, @@ -338,7 +341,8 @@ class GenericWorkerServer(HomeServer): resources.update( { - MEDIA_PREFIX: media_repo, + MEDIA_R0_PREFIX: media_repo, + MEDIA_V3_PREFIX: media_repo, LEGACY_MEDIA_PREFIX: media_repo, "/_synapse/admin": admin_resource, } diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 7bb3744f04..52541faab2 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -29,7 +29,8 @@ from synapse import events from synapse.api.urls import ( FEDERATION_PREFIX, LEGACY_MEDIA_PREFIX, - MEDIA_PREFIX, + MEDIA_R0_PREFIX, + MEDIA_V3_PREFIX, SERVER_KEY_V2_PREFIX, STATIC_PREFIX, WEB_CLIENT_PREFIX, @@ -193,6 +194,8 @@ class SynapseHomeServer(HomeServer): { "/_matrix/client/api/v1": client_resource, "/_matrix/client/r0": client_resource, + "/_matrix/client/v1": client_resource, + "/_matrix/client/v3": client_resource, "/_matrix/client/unstable": client_resource, "/_matrix/client/v2_alpha": client_resource, "/_matrix/client/versions": client_resource, @@ -244,7 +247,11 @@ class SynapseHomeServer(HomeServer): if self.config.server.enable_media_repo: media_repo = self.get_media_repository_resource() resources.update( - {MEDIA_PREFIX: media_repo, LEGACY_MEDIA_PREFIX: media_repo} + { + MEDIA_R0_PREFIX: media_repo, + MEDIA_V3_PREFIX: media_repo, + LEGACY_MEDIA_PREFIX: media_repo, + } ) elif name == "media": raise ConfigError( diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index d08f6bbd7f..f51b636417 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -231,13 +231,32 @@ class ApplicationServiceApi(SimpleHttpClient): json_body=body, args={"access_token": service.hs_token}, ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "push_bulk to %s succeeded! events=%s", + uri, + [event.get("event_id") for event in events], + ) sent_transactions_counter.labels(service.id).inc() sent_events_counter.labels(service.id).inc(len(events)) return True except CodeMessageException as e: - logger.warning("push_bulk to %s received %s", uri, e.code) + logger.warning( + "push_bulk to %s received code=%s msg=%s", + uri, + e.code, + e.msg, + exc_info=logger.isEnabledFor(logging.DEBUG), + ) except Exception as ex: - logger.warning("push_bulk to %s threw exception %s", uri, ex) + logger.warning( + "push_bulk to %s threw exception(%s) %s args=%s", + uri, + type(ex).__name__, + ex, + ex.args, + exc_info=logger.isEnabledFor(logging.DEBUG), + ) failed_transactions_counter.labels(service.id).inc() return False diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 7c4428a138..1265738dc1 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -20,7 +20,18 @@ import os from collections import OrderedDict from hashlib import sha256 from textwrap import dedent -from typing import Any, Iterable, List, MutableMapping, Optional, Union +from typing import ( + Any, + Dict, + Iterable, + List, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, +) import attr import jinja2 @@ -78,7 +89,7 @@ CONFIG_FILE_HEADER = """\ """ -def path_exists(file_path): +def path_exists(file_path: str) -> bool: """Check if a file exists Unlike os.path.exists, this throws an exception if there is an error @@ -86,7 +97,7 @@ def path_exists(file_path): the parent dir). Returns: - bool: True if the file exists; False if not. + True if the file exists; False if not. """ try: os.stat(file_path) @@ -102,15 +113,15 @@ class Config: A configuration section, containing configuration keys and values. Attributes: - section (str): The section title of this config object, such as + section: The section title of this config object, such as "tls" or "logger". This is used to refer to it on the root logger (for example, `config.tls.some_option`). Must be defined in subclasses. """ - section = None + section: str - def __init__(self, root_config=None): + def __init__(self, root_config: "RootConfig" = None): self.root = root_config # Get the path to the default Synapse template directory @@ -119,7 +130,7 @@ class Config: ) @staticmethod - def parse_size(value): + def parse_size(value: Union[str, int]) -> int: if isinstance(value, int): return value sizes = {"K": 1024, "M": 1024 * 1024} @@ -162,15 +173,15 @@ class Config: return int(value) * size @staticmethod - def abspath(file_path): + def abspath(file_path: str) -> str: return os.path.abspath(file_path) if file_path else file_path @classmethod - def path_exists(cls, file_path): + def path_exists(cls, file_path: str) -> bool: return path_exists(file_path) @classmethod - def check_file(cls, file_path, config_name): + def check_file(cls, file_path: Optional[str], config_name: str) -> str: if file_path is None: raise ConfigError("Missing config for %s." % (config_name,)) try: @@ -183,7 +194,7 @@ class Config: return cls.abspath(file_path) @classmethod - def ensure_directory(cls, dir_path): + def ensure_directory(cls, dir_path: str) -> str: dir_path = cls.abspath(dir_path) os.makedirs(dir_path, exist_ok=True) if not os.path.isdir(dir_path): @@ -191,7 +202,7 @@ class Config: return dir_path @classmethod - def read_file(cls, file_path, config_name): + def read_file(cls, file_path: Any, config_name: str) -> str: """Deprecated: call read_file directly""" return read_file(file_path, (config_name,)) @@ -284,6 +295,9 @@ class Config: return [env.get_template(filename) for filename in filenames] +TRootConfig = TypeVar("TRootConfig", bound="RootConfig") + + class RootConfig: """ Holder of an application's configuration. @@ -308,7 +322,9 @@ class RootConfig: raise Exception("Failed making %s: %r" % (config_class.section, e)) setattr(self, config_class.section, conf) - def invoke_all(self, func_name: str, *args, **kwargs) -> MutableMapping[str, Any]: + def invoke_all( + self, func_name: str, *args: Any, **kwargs: Any + ) -> MutableMapping[str, Any]: """ Invoke a function on all instantiated config objects this RootConfig is configured to use. @@ -317,6 +333,7 @@ class RootConfig: func_name: Name of function to invoke *args **kwargs + Returns: ordered dictionary of config section name and the result of the function from it. @@ -332,7 +349,7 @@ class RootConfig: return res @classmethod - def invoke_all_static(cls, func_name: str, *args, **kwargs): + def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: any) -> None: """ Invoke a static function on config objects this RootConfig is configured to use. @@ -341,6 +358,7 @@ class RootConfig: func_name: Name of function to invoke *args **kwargs + Returns: ordered dictionary of config section name and the result of the function from it. @@ -351,16 +369,16 @@ class RootConfig: def generate_config( self, - config_dir_path, - data_dir_path, - server_name, - generate_secrets=False, - report_stats=None, - open_private_ports=False, - listeners=None, - tls_certificate_path=None, - tls_private_key_path=None, - ): + config_dir_path: str, + data_dir_path: str, + server_name: str, + generate_secrets: bool = False, + report_stats: Optional[bool] = None, + open_private_ports: bool = False, + listeners: Optional[List[dict]] = None, + tls_certificate_path: Optional[str] = None, + tls_private_key_path: Optional[str] = None, + ) -> str: """ Build a default configuration file @@ -368,27 +386,27 @@ class RootConfig: (eg with --generate_config). Args: - config_dir_path (str): The path where the config files are kept. Used to + config_dir_path: The path where the config files are kept. Used to create filenames for things like the log config and the signing key. - data_dir_path (str): The path where the data files are kept. Used to create + data_dir_path: The path where the data files are kept. Used to create filenames for things like the database and media store. - server_name (str): The server name. Used to initialise the server_name + server_name: The server name. Used to initialise the server_name config param, but also used in the names of some of the config files. - generate_secrets (bool): True if we should generate new secrets for things + generate_secrets: True if we should generate new secrets for things like the macaroon_secret_key. If False, these parameters will be left unset. - report_stats (bool|None): Initial setting for the report_stats setting. + report_stats: Initial setting for the report_stats setting. If None, report_stats will be left unset. - open_private_ports (bool): True to leave private ports (such as the non-TLS + open_private_ports: True to leave private ports (such as the non-TLS HTTP listener) open to the internet. - listeners (list(dict)|None): A list of descriptions of the listeners - synapse should start with each of which specifies a port (str), a list of + listeners: A list of descriptions of the listeners synapse should + start with each of which specifies a port (int), a list of resources (list(str)), tls (bool) and type (str). For example: [{ "port": 8448, @@ -403,16 +421,12 @@ class RootConfig: "type": "http", }], + tls_certificate_path: The path to the tls certificate. - database (str|None): The database type to configure, either `psycog2` - or `sqlite3`. - - tls_certificate_path (str|None): The path to the tls certificate. - - tls_private_key_path (str|None): The path to the tls private key. + tls_private_key_path: The path to the tls private key. Returns: - str: the yaml config file + The yaml config file """ return CONFIG_FILE_HEADER + "\n\n".join( @@ -432,12 +446,15 @@ class RootConfig: ) @classmethod - def load_config(cls, description, argv): + def load_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> TRootConfig: """Parse the commandline and config files Doesn't support config-file-generation: used by the worker apps. - Returns: Config object. + Returns: + Config object. """ config_parser = argparse.ArgumentParser(description=description) cls.add_arguments_to_parser(config_parser) @@ -446,7 +463,7 @@ class RootConfig: return obj @classmethod - def add_arguments_to_parser(cls, config_parser): + def add_arguments_to_parser(cls, config_parser: argparse.ArgumentParser) -> None: """Adds all the config flags to an ArgumentParser. Doesn't support config-file-generation: used by the worker apps. @@ -454,7 +471,7 @@ class RootConfig: Used for workers where we want to add extra flags/subcommands. Args: - config_parser (ArgumentParser): App description + config_parser: App description """ config_parser.add_argument( @@ -477,7 +494,9 @@ class RootConfig: cls.invoke_all_static("add_arguments", config_parser) @classmethod - def load_config_with_parser(cls, parser, argv): + def load_config_with_parser( + cls: Type[TRootConfig], parser: argparse.ArgumentParser, argv: List[str] + ) -> Tuple[TRootConfig, argparse.Namespace]: """Parse the commandline and config files with the given parser Doesn't support config-file-generation: used by the worker apps. @@ -485,13 +504,12 @@ class RootConfig: Used for workers where we want to add extra flags/subcommands. Args: - parser (ArgumentParser) - argv (list[str]) + parser + argv Returns: - tuple[HomeServerConfig, argparse.Namespace]: Returns the parsed - config object and the parsed argparse.Namespace object from - `parser.parse_args(..)` + Returns the parsed config object and the parsed argparse.Namespace + object from parser.parse_args(..)` """ obj = cls() @@ -520,12 +538,15 @@ class RootConfig: return obj, config_args @classmethod - def load_or_generate_config(cls, description, argv): + def load_or_generate_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> Optional[TRootConfig]: """Parse the commandline and config files Supports generation of config files, so is used for the main homeserver app. - Returns: Config object, or None if --generate-config or --generate-keys was set + Returns: + Config object, or None if --generate-config or --generate-keys was set """ parser = argparse.ArgumentParser(description=description) parser.add_argument( @@ -680,16 +701,21 @@ class RootConfig: return obj - def parse_config_dict(self, config_dict, config_dir_path=None, data_dir_path=None): + def parse_config_dict( + self, + config_dict: Dict[str, Any], + config_dir_path: Optional[str] = None, + data_dir_path: Optional[str] = None, + ) -> None: """Read the information from the config dict into this Config object. Args: - config_dict (dict): Configuration data, as read from the yaml + config_dict: Configuration data, as read from the yaml - config_dir_path (str): The path where the config files are kept. Used to + config_dir_path: The path where the config files are kept. Used to create filenames for things like the log config and the signing key. - data_dir_path (str): The path where the data files are kept. Used to create + data_dir_path: The path where the data files are kept. Used to create filenames for things like the database and media store. """ self.invoke_all( @@ -699,17 +725,20 @@ class RootConfig: data_dir_path=data_dir_path, ) - def generate_missing_files(self, config_dict, config_dir_path): + def generate_missing_files( + self, config_dict: Dict[str, Any], config_dir_path: str + ) -> None: self.invoke_all("generate_files", config_dict, config_dir_path) -def read_config_files(config_files): +def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]: """Read the config files into a dict Args: - config_files (iterable[str]): A list of the config files to read + config_files: A list of the config files to read - Returns: dict + Returns: + The configuration dictionary. """ specified_config = {} for config_file in config_files: @@ -733,17 +762,17 @@ def read_config_files(config_files): return specified_config -def find_config_files(search_paths): +def find_config_files(search_paths: List[str]) -> List[str]: """Finds config files using a list of search paths. If a path is a file then that file path is added to the list. If a search path is a directory then all the "*.yaml" files in that directory are added to the list in sorted order. Args: - search_paths(list(str)): A list of paths to search. + search_paths: A list of paths to search. Returns: - list(str): A list of file paths. + A list of file paths. """ config_files = [] @@ -777,7 +806,7 @@ def find_config_files(search_paths): return config_files -@attr.s +@attr.s(auto_attribs=True) class ShardedWorkerHandlingConfig: """Algorithm for choosing which instance is responsible for handling some sharded work. @@ -787,7 +816,7 @@ class ShardedWorkerHandlingConfig: below). """ - instances = attr.ib(type=List[str]) + instances: List[str] def should_handle(self, instance_name: str, key: str) -> bool: """Whether this instance is responsible for handling the given key.""" diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index c1d9069798..1eb5f5a68c 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -1,4 +1,18 @@ -from typing import Any, Iterable, List, Optional +import argparse +from typing import ( + Any, + Dict, + Iterable, + List, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, +) + +import jinja2 from synapse.config import ( account_validity, @@ -19,6 +33,7 @@ from synapse.config import ( logger, metrics, modules, + oembed, oidc, password_auth_providers, push, @@ -27,6 +42,7 @@ from synapse.config import ( registration, repository, retention, + room, room_directory, saml2, server, @@ -51,7 +67,9 @@ MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str MISSING_REPORT_STATS_SPIEL: str MISSING_SERVER_NAME: str -def path_exists(file_path: str): ... +def path_exists(file_path: str) -> bool: ... + +TRootConfig = TypeVar("TRootConfig", bound="RootConfig") class RootConfig: server: server.ServerConfig @@ -61,6 +79,7 @@ class RootConfig: logging: logger.LoggingConfig ratelimiting: ratelimiting.RatelimitConfig media: repository.ContentRepositoryConfig + oembed: oembed.OembedConfig captcha: captcha.CaptchaConfig voip: voip.VoipConfig registration: registration.RegistrationConfig @@ -80,6 +99,7 @@ class RootConfig: authproviders: password_auth_providers.PasswordAuthProviderConfig push: push.PushConfig spamchecker: spam_checker.SpamCheckerConfig + room: room.RoomConfig groups: groups.GroupsConfig userdirectory: user_directory.UserDirectoryConfig consent: consent.ConsentConfig @@ -87,72 +107,85 @@ class RootConfig: servernotices: server_notices.ServerNoticesConfig roomdirectory: room_directory.RoomDirectoryConfig thirdpartyrules: third_party_event_rules.ThirdPartyRulesConfig - tracer: tracer.TracerConfig + tracing: tracer.TracerConfig redis: redis.RedisConfig modules: modules.ModulesConfig caches: cache.CacheConfig federation: federation.FederationConfig retention: retention.RetentionConfig - config_classes: List = ... + config_classes: List[Type["Config"]] = ... def __init__(self) -> None: ... - def invoke_all(self, func_name: str, *args: Any, **kwargs: Any): ... + def invoke_all( + self, func_name: str, *args: Any, **kwargs: Any + ) -> MutableMapping[str, Any]: ... @classmethod def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: Any) -> None: ... - def __getattr__(self, item: str): ... def parse_config_dict( self, - config_dict: Any, - config_dir_path: Optional[Any] = ..., - data_dir_path: Optional[Any] = ..., + config_dict: Dict[str, Any], + config_dir_path: Optional[str] = ..., + data_dir_path: Optional[str] = ..., ) -> None: ... - read_config: Any = ... def generate_config( self, config_dir_path: str, data_dir_path: str, server_name: str, generate_secrets: bool = ..., - report_stats: Optional[str] = ..., + report_stats: Optional[bool] = ..., open_private_ports: bool = ..., listeners: Optional[Any] = ..., - database_conf: Optional[Any] = ..., tls_certificate_path: Optional[str] = ..., tls_private_key_path: Optional[str] = ..., - ): ... + ) -> str: ... @classmethod - def load_or_generate_config(cls, description: Any, argv: Any): ... + def load_or_generate_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> Optional[TRootConfig]: ... @classmethod - def load_config(cls, description: Any, argv: Any): ... + def load_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> TRootConfig: ... @classmethod - def add_arguments_to_parser(cls, config_parser: Any) -> None: ... + def add_arguments_to_parser( + cls, config_parser: argparse.ArgumentParser + ) -> None: ... @classmethod - def load_config_with_parser(cls, parser: Any, argv: Any): ... + def load_config_with_parser( + cls: Type[TRootConfig], parser: argparse.ArgumentParser, argv: List[str] + ) -> Tuple[TRootConfig, argparse.Namespace]: ... def generate_missing_files( self, config_dict: dict, config_dir_path: str ) -> None: ... class Config: root: RootConfig + default_template_dir: str def __init__(self, root_config: Optional[RootConfig] = ...) -> None: ... - def __getattr__(self, item: str, from_root: bool = ...): ... @staticmethod - def parse_size(value: Any): ... + def parse_size(value: Union[str, int]) -> int: ... @staticmethod - def parse_duration(value: Any): ... + def parse_duration(value: Union[str, int]) -> int: ... @staticmethod - def abspath(file_path: Optional[str]): ... + def abspath(file_path: Optional[str]) -> str: ... @classmethod - def path_exists(cls, file_path: str): ... + def path_exists(cls, file_path: str) -> bool: ... @classmethod - def check_file(cls, file_path: str, config_name: str): ... + def check_file(cls, file_path: str, config_name: str) -> str: ... @classmethod - def ensure_directory(cls, dir_path: str): ... + def ensure_directory(cls, dir_path: str) -> str: ... @classmethod - def read_file(cls, file_path: str, config_name: str): ... + def read_file(cls, file_path: str, config_name: str) -> str: ... + def read_template(self, filenames: str) -> jinja2.Template: ... + def read_templates( + self, + filenames: List[str], + custom_template_directories: Optional[Iterable[str]] = None, + ) -> List[jinja2.Template]: ... -def read_config_files(config_files: List[str]): ... -def find_config_files(search_paths: List[str]): ... +def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]: ... +def find_config_files(search_paths: List[str]) -> List[str]: ... class ShardedWorkerHandlingConfig: instances: List[str] diff --git a/synapse/config/cache.py b/synapse/config/cache.py index d119427ad8..f054455534 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -15,7 +15,7 @@ import os import re import threading -from typing import Callable, Dict +from typing import Callable, Dict, Optional from synapse.python_dependencies import DependencyException, check_requirements @@ -217,7 +217,7 @@ class CacheConfig(Config): expiry_time = cache_config.get("expiry_time") if expiry_time: - self.expiry_time_msec = self.parse_duration(expiry_time) + self.expiry_time_msec: Optional[int] = self.parse_duration(expiry_time) else: self.expiry_time_msec = None diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index afd65fecd3..510b647c63 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -137,33 +137,14 @@ class EmailConfig(Config): if self.root.registration.account_threepid_delegate_email else ThreepidBehaviour.LOCAL ) - # Prior to Synapse v1.4.0, there was another option that defined whether Synapse would - # use an identity server to password reset tokens on its behalf. We now warn the user - # if they have this set and tell them to use the updated option, while using a default - # identity server in the process. - self.using_identity_server_from_trusted_list = False - if ( - not self.root.registration.account_threepid_delegate_email - and config.get("trust_identity_server_for_password_resets", False) is True - ): - # Use the first entry in self.trusted_third_party_id_servers instead - if self.trusted_third_party_id_servers: - # XXX: It's a little confusing that account_threepid_delegate_email is modified - # both in RegistrationConfig and here. We should factor this bit out - first_trusted_identity_server = self.trusted_third_party_id_servers[0] - - # trusted_third_party_id_servers does not contain a scheme whereas - # account_threepid_delegate_email is expected to. Presume https - self.root.registration.account_threepid_delegate_email = ( - "https://" + first_trusted_identity_server - ) - self.using_identity_server_from_trusted_list = True - else: - raise ConfigError( - "Attempted to use an identity server from" - '"trusted_third_party_id_servers" but it is empty.' - ) + if config.get("trust_identity_server_for_password_resets"): + raise ConfigError( + 'The config option "trust_identity_server_for_password_resets" ' + 'has been replaced by "account_threepid_delegate". ' + "Please consult the sample config at docs/sample_config.yaml for " + "details and update your config file." + ) self.local_threepid_handling_disabled_due_to_email_config = False if ( diff --git a/synapse/config/jwt.py b/synapse/config/jwt.py index 9d295f5856..24c3ef01fc 100644 --- a/synapse/config/jwt.py +++ b/synapse/config/jwt.py @@ -31,6 +31,8 @@ class JWTConfig(Config): self.jwt_secret = jwt_config["secret"] self.jwt_algorithm = jwt_config["algorithm"] + self.jwt_subject_claim = jwt_config.get("subject_claim", "sub") + # The issuer and audiences are optional, if provided, it is asserted # that the claims exist on the JWT. self.jwt_issuer = jwt_config.get("issuer") @@ -46,6 +48,7 @@ class JWTConfig(Config): self.jwt_enabled = False self.jwt_secret = None self.jwt_algorithm = None + self.jwt_subject_claim = None self.jwt_issuer = None self.jwt_audiences = None @@ -88,6 +91,12 @@ class JWTConfig(Config): # #algorithm: "provided-by-your-issuer" + # Name of the claim containing a unique identifier for the user. + # + # Optional, defaults to `sub`. + # + #subject_claim: "sub" + # The issuer to validate the "iss" claim against. # # Optional, if provided the "iss" claim will be required and diff --git a/synapse/config/key.py b/synapse/config/key.py index 015dbb8a67..035ee2416b 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -16,6 +16,7 @@ import hashlib import logging import os +from typing import Any, Dict import attr import jsonschema @@ -312,7 +313,7 @@ class KeyConfig(Config): ) return keys - def generate_files(self, config, config_dir_path): + def generate_files(self, config: Dict[str, Any], config_dir_path: str) -> None: if "signing_key" in config: return diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 5252e61a99..63aab0babe 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -18,7 +18,7 @@ import os import sys import threading from string import Template -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict import yaml from zope.interface import implementer @@ -185,7 +185,7 @@ class LoggingConfig(Config): help=argparse.SUPPRESS, ) - def generate_files(self, config, config_dir_path): + def generate_files(self, config: Dict[str, Any], config_dir_path: str) -> None: log_config = config.get("log_config") if log_config and not os.path.exists(log_config): log_file = self.abspath("homeserver.log") diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 5379e80715..1ddad7cb70 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional from synapse.api.constants import RoomCreationPreset from synapse.config._base import Config, ConfigError @@ -39,9 +40,7 @@ class RegistrationConfig(Config): self.registration_shared_secret = config.get("registration_shared_secret") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) - self.trusted_third_party_id_servers = config.get( - "trusted_third_party_id_servers", ["matrix.org", "vector.im"] - ) + account_threepid_delegates = config.get("account_threepid_delegates") or {} self.account_threepid_delegate_email = account_threepid_delegates.get("email") self.account_threepid_delegate_msisdn = account_threepid_delegates.get("msisdn") @@ -114,26 +113,25 @@ class RegistrationConfig(Config): session_lifetime = self.parse_duration(session_lifetime) self.session_lifetime = session_lifetime - # The `access_token_lifetime` applies for tokens that can be renewed - # using a refresh token, as per MSC2918. If it is `None`, the refresh - # token mechanism is disabled. - # - # Since it is incompatible with the `session_lifetime` mechanism, it is set to - # `None` by default if a `session_lifetime` is set. - access_token_lifetime = config.get( - "access_token_lifetime", "5m" if session_lifetime is None else None + # The `refreshable_access_token_lifetime` applies for tokens that can be renewed + # using a refresh token, as per MSC2918. + # If it is `None`, the refresh token mechanism is disabled. + refreshable_access_token_lifetime = config.get( + "refreshable_access_token_lifetime", + "5m", ) - if access_token_lifetime is not None: - access_token_lifetime = self.parse_duration(access_token_lifetime) - self.access_token_lifetime = access_token_lifetime - - if session_lifetime is not None and access_token_lifetime is not None: - raise ConfigError( - "The refresh token mechanism is incompatible with the " - "`session_lifetime` option. Consider disabling the " - "`session_lifetime` option or disabling the refresh token " - "mechanism by removing the `access_token_lifetime` option." + if refreshable_access_token_lifetime is not None: + refreshable_access_token_lifetime = self.parse_duration( + refreshable_access_token_lifetime ) + self.refreshable_access_token_lifetime: Optional[ + int + ] = refreshable_access_token_lifetime + + refresh_token_lifetime = config.get("refresh_token_lifetime") + if refresh_token_lifetime is not None: + refresh_token_lifetime = self.parse_duration(refresh_token_lifetime) + self.refresh_token_lifetime: Optional[int] = refresh_token_lifetime # The fallback template used for authenticating using a registration token self.registration_token_template = self.read_template("registration_token.html") diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 56981cac79..57316c59b6 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -1,4 +1,5 @@ # Copyright 2018 New Vector Ltd +# Copyright 2021 Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + +from synapse.types import JsonDict from synapse.util import glob_to_regex from ._base import Config, ConfigError @@ -20,7 +24,7 @@ from ._base import Config, ConfigError class RoomDirectoryConfig(Config): section = "roomdirectory" - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: self.enable_room_list_search = config.get("enable_room_list_search", True) alias_creation_rules = config.get("alias_creation_rules") @@ -47,7 +51,7 @@ class RoomDirectoryConfig(Config): _RoomDirectoryRule("room_list_publication_rules", {"action": "allow"}) ] - def generate_config_section(self, config_dir_path, server_name, **kwargs): + def generate_config_section(self, config_dir_path, server_name, **kwargs) -> str: return """ # Uncomment to disable searching the public room list. When disabled # blocks searching local and remote room lists for local and remote @@ -113,16 +117,16 @@ class RoomDirectoryConfig(Config): # action: allow """ - def is_alias_creation_allowed(self, user_id, room_id, alias): + def is_alias_creation_allowed(self, user_id: str, room_id: str, alias: str) -> bool: """Checks if the given user is allowed to create the given alias Args: - user_id (str) - room_id (str) - alias (str) + user_id: The user to check. + room_id: The room ID for the alias. + alias: The alias being created. Returns: - boolean: True if user is allowed to create the alias + True if user is allowed to create the alias """ for rule in self._alias_creation_rules: if rule.matches(user_id, room_id, [alias]): @@ -130,16 +134,18 @@ class RoomDirectoryConfig(Config): return False - def is_publishing_room_allowed(self, user_id, room_id, aliases): + def is_publishing_room_allowed( + self, user_id: str, room_id: str, aliases: List[str] + ) -> bool: """Checks if the given user is allowed to publish the room Args: - user_id (str) - room_id (str) - aliases (list[str]): any local aliases associated with the room + user_id: The user ID publishing the room. + room_id: The room being published. + aliases: any local aliases associated with the room Returns: - boolean: True if user can publish room + True if user can publish room """ for rule in self._room_list_publication_rules: if rule.matches(user_id, room_id, aliases): @@ -153,11 +159,11 @@ class _RoomDirectoryRule: creating an alias or publishing a room. """ - def __init__(self, option_name, rule): + def __init__(self, option_name: str, rule: JsonDict): """ Args: - option_name (str): Name of the config option this rule belongs to - rule (dict): The rule as specified in the config + option_name: Name of the config option this rule belongs to + rule: The rule as specified in the config """ action = rule["action"] @@ -181,18 +187,18 @@ class _RoomDirectoryRule: except Exception as e: raise ConfigError("Failed to parse glob into regex") from e - def matches(self, user_id, room_id, aliases): + def matches(self, user_id: str, room_id: str, aliases: List[str]) -> bool: """Tests if this rule matches the given user_id, room_id and aliases. Args: - user_id (str) - room_id (str) - aliases (list[str]): The associated aliases to the room. Will be a - single element for testing alias creation, and can be empty for - testing room publishing. + user_id: The user ID to check. + room_id: The room ID to check. + aliases: The associated aliases to the room. Will be a single element + for testing alias creation, and can be empty for testing room + publishing. Returns: - boolean + True if the rule matches. """ # Note: The regexes are anchored at both ends diff --git a/synapse/config/server.py b/synapse/config/server.py index 7bc0030a9e..8445e9dd05 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -421,7 +421,7 @@ class ServerConfig(Config): # before redacting them. redaction_retention_period = config.get("redaction_retention_period", "7d") if redaction_retention_period is not None: - self.redaction_retention_period = self.parse_duration( + self.redaction_retention_period: Optional[int] = self.parse_duration( redaction_retention_period ) else: @@ -430,7 +430,7 @@ class ServerConfig(Config): # How long to keep entries in the `users_ips` table. user_ips_max_age = config.get("user_ips_max_age", "28d") if user_ips_max_age is not None: - self.user_ips_max_age = self.parse_duration(user_ips_max_age) + self.user_ips_max_age: Optional[int] = self.parse_duration(user_ips_max_age) else: self.user_ips_max_age = None diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 6227434bac..4ca111618f 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -14,7 +14,6 @@ import logging import os -from datetime import datetime from typing import List, Optional, Pattern from OpenSSL import SSL, crypto @@ -133,55 +132,6 @@ class TlsConfig(Config): self.tls_certificate: Optional[crypto.X509] = None self.tls_private_key: Optional[crypto.PKey] = None - def is_disk_cert_valid(self, allow_self_signed=True): - """ - Is the certificate we have on disk valid, and if so, for how long? - - Args: - allow_self_signed (bool): Should we allow the certificate we - read to be self signed? - - Returns: - int: Days remaining of certificate validity. - None: No certificate exists. - """ - if not os.path.exists(self.tls_certificate_file): - return None - - try: - with open(self.tls_certificate_file, "rb") as f: - cert_pem = f.read() - except Exception as e: - raise ConfigError( - "Failed to read existing certificate file %s: %s" - % (self.tls_certificate_file, e) - ) - - try: - tls_certificate = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) - except Exception as e: - raise ConfigError( - "Failed to parse existing certificate file %s: %s" - % (self.tls_certificate_file, e) - ) - - if not allow_self_signed: - if tls_certificate.get_subject() == tls_certificate.get_issuer(): - raise ValueError( - "TLS Certificate is self signed, and this is not permitted" - ) - - # YYYYMMDDhhmmssZ -- in UTC - expiry_data = tls_certificate.get_notAfter() - if expiry_data is None: - raise ValueError( - "TLS Certificate has no expiry date, and this is not permitted" - ) - expires_on = datetime.strptime(expiry_data.decode("ascii"), "%Y%m%d%H%M%SZ") - now = datetime.utcnow() - days_remaining = (expires_on - now).days - return days_remaining - def read_certificate_from_disk(self): """ Read the certificates and private key from disk. @@ -263,8 +213,8 @@ class TlsConfig(Config): # #federation_certificate_verification_whitelist: # - lon.example.com - # - *.domain.com - # - *.onion + # - "*.domain.com" + # - "*.onion" # List of custom certificate authorities for federation traffic. # @@ -295,7 +245,7 @@ class TlsConfig(Config): cert_path = self.tls_certificate_file logger.info("Loading TLS certificate from %s", cert_path) cert_pem = self.read_file(cert_path, "tls_certificate_path") - cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem.encode()) return cert diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index 2552f688d0..6d6678c7e4 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -53,8 +53,8 @@ class UserDirectoryConfig(Config): # indexes were (re)built was before Synapse 1.44, you'll have to # rebuild the indexes in order to search through all known users. # These indexes are built the first time Synapse starts; admins can - # manually trigger a rebuild following the instructions at - # https://matrix-org.github.io/synapse/latest/user_directory.html + # manually trigger a rebuild via API following the instructions at + # https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/background_updates.html#run # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index f641ab7ef5..993b04099e 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -1,5 +1,4 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2017, 2018 New Vector Ltd +# Copyright 2014-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -120,16 +119,6 @@ class VerifyJsonRequest: key_ids=key_ids, ) - def to_fetch_key_request(self) -> "_FetchKeyRequest": - """Create a key fetch request for all keys needed to satisfy the - verification request. - """ - return _FetchKeyRequest( - server_name=self.server_name, - minimum_valid_until_ts=self.minimum_valid_until_ts, - key_ids=self.key_ids, - ) - class KeyLookupError(ValueError): pass @@ -179,8 +168,22 @@ class Keyring: clock=hs.get_clock(), process_batch_callback=self._inner_fetch_key_requests, ) - self.verify_key = get_verify_key(hs.signing_key) - self.hostname = hs.hostname + + self._hostname = hs.hostname + + # build a FetchKeyResult for each of our own keys, to shortcircuit the + # fetcher. + self._local_verify_keys: Dict[str, FetchKeyResult] = {} + for key_id, key in hs.config.key.old_signing_keys.items(): + self._local_verify_keys[key_id] = FetchKeyResult( + verify_key=key, valid_until_ts=key.expired_ts + ) + + vk = get_verify_key(hs.signing_key) + self._local_verify_keys[f"{vk.alg}:{vk.version}"] = FetchKeyResult( + verify_key=vk, + valid_until_ts=2 ** 63, # fake future timestamp + ) async def verify_json_for_server( self, @@ -267,22 +270,32 @@ class Keyring: Codes.UNAUTHORIZED, ) - # If we are the originating server don't fetch verify key for self over federation - if verify_request.server_name == self.hostname: - await self._process_json(self.verify_key, verify_request) - return + found_keys: Dict[str, FetchKeyResult] = {} - # Add the keys we need to verify to the queue for retrieval. We queue - # up requests for the same server so we don't end up with many in flight - # requests for the same keys. - key_request = verify_request.to_fetch_key_request() - found_keys_by_server = await self._server_queue.add_to_queue( - key_request, key=verify_request.server_name - ) + # If we are the originating server, short-circuit the key-fetch for any keys + # we already have + if verify_request.server_name == self._hostname: + for key_id in verify_request.key_ids: + if key_id in self._local_verify_keys: + found_keys[key_id] = self._local_verify_keys[key_id] + + key_ids_to_find = set(verify_request.key_ids) - found_keys.keys() + if key_ids_to_find: + # Add the keys we need to verify to the queue for retrieval. We queue + # up requests for the same server so we don't end up with many in flight + # requests for the same keys. + key_request = _FetchKeyRequest( + server_name=verify_request.server_name, + minimum_valid_until_ts=verify_request.minimum_valid_until_ts, + key_ids=list(key_ids_to_find), + ) + found_keys_by_server = await self._server_queue.add_to_queue( + key_request, key=verify_request.server_name + ) - # Since we batch up requests the returned set of keys may contain keys - # from other servers, so we pull out only the ones we care about.s - found_keys = found_keys_by_server.get(verify_request.server_name, {}) + # Since we batch up requests the returned set of keys may contain keys + # from other servers, so we pull out only the ones we care about. + found_keys.update(found_keys_by_server.get(verify_request.server_name, {})) # Verify each signature we got valid keys for, raising if we can't # verify any of them. @@ -654,21 +667,25 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher): perspective_name, ) + request: JsonDict = {} + for queue_value in keys_to_fetch: + # there may be multiple requests for each server, so we have to merge + # them intelligently. + request_for_server = { + key_id: { + "minimum_valid_until_ts": queue_value.minimum_valid_until_ts, + } + for key_id in queue_value.key_ids + } + request.setdefault(queue_value.server_name, {}).update(request_for_server) + + logger.debug("Request to notary server %s: %s", perspective_name, request) + try: query_response = await self.client.post_json( destination=perspective_name, path="/_matrix/key/v2/query", - data={ - "server_keys": { - queue_value.server_name: { - key_id: { - "minimum_valid_until_ts": queue_value.minimum_valid_until_ts, - } - for key_id in queue_value.key_ids - } - for queue_value in keys_to_fetch - } - }, + data={"server_keys": request}, ) except (NotRetryingDestination, RequestSendFailed) as e: # these both have str() representations which we can't really improve upon @@ -676,6 +693,10 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher): except HttpResponseException as e: raise KeyLookupError("Remote server returned an error: %s" % (e,)) + logger.debug( + "Response from notary server %s: %s", perspective_name, query_response + ) + keys: Dict[str, Dict[str, FetchKeyResult]] = {} added_keys: List[Tuple[str, str, FetchKeyResult]] = [] diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index d7527008c4..f251402ed8 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -322,6 +322,11 @@ class _AsyncEventContextImpl(EventContext): attributes by loading from the database. """ if self.state_group is None: + # No state group means the event is an outlier. Usually the state_ids dicts are also + # pre-set to empty dicts, but they get reset when the context is serialized, so set + # them to empty dicts again here. + self._current_state_ids = {} + self._prev_state_ids = {} return current_state_ids = await self._storage.state.get_state_ids_for_group( diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 6fa631aa1d..e5967c995e 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -392,15 +393,16 @@ class EventClientSerializer: self, event: Union[JsonDict, EventBase], time_now: int, - bundle_aggregations: bool = True, + bundle_relations: bool = True, **kwargs: Any, ) -> JsonDict: """Serializes a single event. Args: - event + event: The event being serialized. time_now: The current time in milliseconds - bundle_aggregations: Whether to bundle in related events + bundle_relations: Whether to include the bundled relations for this + event. **kwargs: Arguments to pass to `serialize_event` Returns: @@ -410,77 +412,93 @@ class EventClientSerializer: if not isinstance(event, EventBase): return event - event_id = event.event_id serialized_event = serialize_event(event, time_now, **kwargs) # If MSC1849 is enabled then we need to look if there are any relations # we need to bundle in with the event. # Do not bundle relations if the event has been redacted if not event.internal_metadata.is_redacted() and ( - self._msc1849_enabled and bundle_aggregations + self._msc1849_enabled and bundle_relations ): - annotations = await self.store.get_aggregation_groups_for_event(event_id) - references = await self.store.get_relations_for_event( - event_id, RelationTypes.REFERENCE, direction="f" - ) - - if annotations.chunk: - r = serialized_event["unsigned"].setdefault("m.relations", {}) - r[RelationTypes.ANNOTATION] = annotations.to_dict() - - if references.chunk: - r = serialized_event["unsigned"].setdefault("m.relations", {}) - r[RelationTypes.REFERENCE] = references.to_dict() - - edit = None - if event.type == EventTypes.Message: - edit = await self.store.get_applicable_edit(event_id) - - if edit: - # If there is an edit replace the content, preserving existing - # relations. - - # Ensure we take copies of the edit content, otherwise we risk modifying - # the original event. - edit_content = edit.content.copy() - - # Unfreeze the event content if necessary, so that we may modify it below - edit_content = unfreeze(edit_content) - serialized_event["content"] = edit_content.get("m.new_content", {}) - - # Check for existing relations - relations = event.content.get("m.relates_to") - if relations: - # Keep the relations, ensuring we use a dict copy of the original - serialized_event["content"]["m.relates_to"] = relations.copy() - else: - serialized_event["content"].pop("m.relates_to", None) - - r = serialized_event["unsigned"].setdefault("m.relations", {}) - r[RelationTypes.REPLACE] = { - "event_id": edit.event_id, - "origin_server_ts": edit.origin_server_ts, - "sender": edit.sender, - } - - # If this event is the start of a thread, include a summary of the replies. - if self._msc3440_enabled: - ( - thread_count, - latest_thread_event, - ) = await self.store.get_thread_summary(event_id) - if latest_thread_event: - r = serialized_event["unsigned"].setdefault("m.relations", {}) - r[RelationTypes.THREAD] = { - # Don't bundle aggregations as this could recurse forever. - "latest_event": await self.serialize_event( - latest_thread_event, time_now, bundle_aggregations=False - ), - "count": thread_count, - } + await self._injected_bundled_relations(event, time_now, serialized_event) return serialized_event + async def _injected_bundled_relations( + self, event: EventBase, time_now: int, serialized_event: JsonDict + ) -> None: + """Potentially injects bundled relations into the unsigned portion of the serialized event. + + Args: + event: The event being serialized. + time_now: The current time in milliseconds + serialized_event: The serialized event which may be modified. + + """ + event_id = event.event_id + + # The bundled relations to include. + relations = {} + + annotations = await self.store.get_aggregation_groups_for_event(event_id) + if annotations.chunk: + relations[RelationTypes.ANNOTATION] = annotations.to_dict() + + references = await self.store.get_relations_for_event( + event_id, RelationTypes.REFERENCE, direction="f" + ) + if references.chunk: + relations[RelationTypes.REFERENCE] = references.to_dict() + + edit = None + if event.type == EventTypes.Message: + edit = await self.store.get_applicable_edit(event_id) + + if edit: + # If there is an edit replace the content, preserving existing + # relations. + + # Ensure we take copies of the edit content, otherwise we risk modifying + # the original event. + edit_content = edit.content.copy() + + # Unfreeze the event content if necessary, so that we may modify it below + edit_content = unfreeze(edit_content) + serialized_event["content"] = edit_content.get("m.new_content", {}) + + # Check for existing relations + relates_to = event.content.get("m.relates_to") + if relates_to: + # Keep the relations, ensuring we use a dict copy of the original + serialized_event["content"]["m.relates_to"] = relates_to.copy() + else: + serialized_event["content"].pop("m.relates_to", None) + + relations[RelationTypes.REPLACE] = { + "event_id": edit.event_id, + "origin_server_ts": edit.origin_server_ts, + "sender": edit.sender, + } + + # If this event is the start of a thread, include a summary of the replies. + if self._msc3440_enabled: + ( + thread_count, + latest_thread_event, + ) = await self.store.get_thread_summary(event_id) + if latest_thread_event: + relations[RelationTypes.THREAD] = { + # Don't bundle relations as this could recurse forever. + "latest_event": await self.serialize_event( + latest_thread_event, time_now, bundle_relations=False + ), + "count": thread_count, + } + + # If any bundled relations were found, include them. + if relations: + serialized_event["unsigned"].setdefault("m.relations", {}).update(relations) + async def serialize_events( self, events: Iterable[Union[JsonDict, EventBase]], time_now: int, **kwargs: Any ) -> List[JsonDict]: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 3b85b135e0..bc3f96c1fc 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -1395,11 +1395,28 @@ class FederationClient(FederationBase): async def send_request( destination: str, ) -> Tuple[JsonDict, Sequence[JsonDict], Sequence[str]]: - res = await self.transport_layer.get_room_hierarchy( - destination=destination, - room_id=room_id, - suggested_only=suggested_only, - ) + try: + res = await self.transport_layer.get_room_hierarchy( + destination=destination, + room_id=room_id, + suggested_only=suggested_only, + ) + except HttpResponseException as e: + # If an error is received that is due to an unrecognised endpoint, + # fallback to the unstable endpoint. Otherwise consider it a + # legitmate error and raise. + if not self._is_unknown_endpoint(e): + raise + + logger.debug( + "Couldn't fetch room hierarchy with the v1 API, falling back to the unstable API" + ) + + res = await self.transport_layer.get_room_hierarchy_unstable( + destination=destination, + room_id=room_id, + suggested_only=suggested_only, + ) room = res.get("room") if not isinstance(room, dict): @@ -1449,6 +1466,10 @@ class FederationClient(FederationBase): if e.code != 502: raise + logger.debug( + "Couldn't fetch room hierarchy, falling back to the spaces API" + ) + # Fallback to the old federation API and translate the results if # no servers implement the new API. # diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9a8758e9a6..8fbc75aa65 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -613,8 +613,11 @@ class FederationServer(FederationBase): state = await self.store.get_events(state_ids) time_now = self._clock.time_msec() + event_json = event.get_pdu_json() return { - "org.matrix.msc3083.v2.event": event.get_pdu_json(), + # TODO Remove the unstable prefix when servers have updated. + "org.matrix.msc3083.v2.event": event_json, + "event": event_json, "state": [p.get_pdu_json(time_now) for p in state.values()], "auth_chain": [p.get_pdu_json(time_now) for p in auth_chain], } diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 10b5aa5af8..fe29bcfd4b 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -1192,10 +1192,24 @@ class TransportLayerClient: ) async def get_room_hierarchy( - self, - destination: str, - room_id: str, - suggested_only: bool, + self, destination: str, room_id: str, suggested_only: bool + ) -> JsonDict: + """ + Args: + destination: The remote server + room_id: The room ID to ask about. + suggested_only: if True, only suggested rooms will be returned + """ + path = _create_v1_path("/hierarchy/%s", room_id) + + return await self.client.get_json( + destination=destination, + path=path, + args={"suggested_only": "true" if suggested_only else "false"}, + ) + + async def get_room_hierarchy_unstable( + self, destination: str, room_id: str, suggested_only: bool ) -> JsonDict: """ Args: @@ -1317,15 +1331,26 @@ class SendJoinParser(ByteParser[SendJoinResponse]): prefix + "auth_chain.item", use_float=True, ) - self._coro_event = ijson.kvitems_coro( + # TODO Remove the unstable prefix when servers have updated. + # + # By re-using the same event dictionary this will cause the parsing of + # org.matrix.msc3083.v2.event and event to stomp over each other. + # Generally this should be fine. + self._coro_unstable_event = ijson.kvitems_coro( _event_parser(self._response.event_dict), prefix + "org.matrix.msc3083.v2.event", use_float=True, ) + self._coro_event = ijson.kvitems_coro( + _event_parser(self._response.event_dict), + prefix + "event", + use_float=True, + ) def write(self, data: bytes) -> int: self._coro_state.send(data) self._coro_auth.send(data) + self._coro_unstable_event.send(data) self._coro_event.send(data) return len(data) diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py index 2fdf6cc99e..66e915228c 100644 --- a/synapse/federation/transport/server/federation.py +++ b/synapse/federation/transport/server/federation.py @@ -611,7 +611,6 @@ class FederationSpaceSummaryServlet(BaseFederationServlet): class FederationRoomHierarchyServlet(BaseFederationServlet): - PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946" PATH = "/hierarchy/(?P<room_id>[^/]*)" def __init__( @@ -637,6 +636,10 @@ class FederationRoomHierarchyServlet(BaseFederationServlet): ) +class FederationRoomHierarchyUnstableServlet(FederationRoomHierarchyServlet): + PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946" + + class RoomComplexityServlet(BaseFederationServlet): """ Indicates to other servers how complex (and therefore likely @@ -701,6 +704,7 @@ FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( RoomComplexityServlet, FederationSpaceSummaryServlet, FederationRoomHierarchyServlet, + FederationRoomHierarchyUnstableServlet, FederationV1SendKnockServlet, FederationMakeKnockServlet, ) diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index 53f99031b1..a87896e538 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -40,6 +40,8 @@ from typing import TYPE_CHECKING, Optional, Tuple from signedjson.sign import sign_json +from twisted.internet.defer import Deferred + from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import JsonDict, get_domain_from_id @@ -166,7 +168,7 @@ class GroupAttestionRenewer: return {} - def _start_renew_attestations(self) -> None: + def _start_renew_attestations(self) -> "Deferred[None]": return run_as_background_process("renew_attestations", self._renew_attestations) async def _renew_attestations(self) -> None: diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index b62e13b725..4d9c4e5834 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -18,6 +18,7 @@ import time import unicodedata import urllib.parse from binascii import crc32 +from http import HTTPStatus from typing import ( TYPE_CHECKING, Any, @@ -756,53 +757,109 @@ class AuthHandler: async def refresh_token( self, refresh_token: str, - valid_until_ms: Optional[int], - ) -> Tuple[str, str]: + access_token_valid_until_ms: Optional[int], + refresh_token_valid_until_ms: Optional[int], + ) -> Tuple[str, str, Optional[int]]: """ Consumes a refresh token and generate both a new access token and a new refresh token from it. The consumed refresh token is considered invalid after the first use of the new access token or the new refresh token. + The lifetime of both the access token and refresh token will be capped so that they + do not exceed the session's ultimate expiry time, if applicable. + Args: refresh_token: The token to consume. - valid_until_ms: The expiration timestamp of the new access token. - + access_token_valid_until_ms: The expiration timestamp of the new access token. + None if the access token does not expire. + refresh_token_valid_until_ms: The expiration timestamp of the new refresh token. + None if the refresh token does not expire. Returns: - A tuple containing the new access token and refresh token + A tuple containing: + - the new access token + - the new refresh token + - the actual expiry time of the access token, which may be earlier than + `access_token_valid_until_ms`. """ # Verify the token signature first before looking up the token if not self._verify_refresh_token(refresh_token): - raise SynapseError(401, "invalid refresh token", Codes.UNKNOWN_TOKEN) + raise SynapseError( + HTTPStatus.UNAUTHORIZED, "invalid refresh token", Codes.UNKNOWN_TOKEN + ) existing_token = await self.store.lookup_refresh_token(refresh_token) if existing_token is None: - raise SynapseError(401, "refresh token does not exist", Codes.UNKNOWN_TOKEN) + raise SynapseError( + HTTPStatus.UNAUTHORIZED, + "refresh token does not exist", + Codes.UNKNOWN_TOKEN, + ) if ( existing_token.has_next_access_token_been_used or existing_token.has_next_refresh_token_been_refreshed ): raise SynapseError( - 403, "refresh token isn't valid anymore", Codes.FORBIDDEN + HTTPStatus.FORBIDDEN, + "refresh token isn't valid anymore", + Codes.FORBIDDEN, + ) + + now_ms = self._clock.time_msec() + + if existing_token.expiry_ts is not None and existing_token.expiry_ts < now_ms: + + raise SynapseError( + HTTPStatus.FORBIDDEN, + "The supplied refresh token has expired", + Codes.FORBIDDEN, ) + if existing_token.ultimate_session_expiry_ts is not None: + # This session has a bounded lifetime, even across refreshes. + + if access_token_valid_until_ms is not None: + access_token_valid_until_ms = min( + access_token_valid_until_ms, + existing_token.ultimate_session_expiry_ts, + ) + else: + access_token_valid_until_ms = existing_token.ultimate_session_expiry_ts + + if refresh_token_valid_until_ms is not None: + refresh_token_valid_until_ms = min( + refresh_token_valid_until_ms, + existing_token.ultimate_session_expiry_ts, + ) + else: + refresh_token_valid_until_ms = existing_token.ultimate_session_expiry_ts + if existing_token.ultimate_session_expiry_ts < now_ms: + raise SynapseError( + HTTPStatus.FORBIDDEN, + "The session has expired and can no longer be refreshed", + Codes.FORBIDDEN, + ) + ( new_refresh_token, new_refresh_token_id, - ) = await self.get_refresh_token_for_user_id( - user_id=existing_token.user_id, device_id=existing_token.device_id + ) = await self.create_refresh_token_for_user_id( + user_id=existing_token.user_id, + device_id=existing_token.device_id, + expiry_ts=refresh_token_valid_until_ms, + ultimate_session_expiry_ts=existing_token.ultimate_session_expiry_ts, ) - access_token = await self.get_access_token_for_user_id( + access_token = await self.create_access_token_for_user_id( user_id=existing_token.user_id, device_id=existing_token.device_id, - valid_until_ms=valid_until_ms, + valid_until_ms=access_token_valid_until_ms, refresh_token_id=new_refresh_token_id, ) await self.store.replace_refresh_token( existing_token.token_id, new_refresh_token_id ) - return access_token, new_refresh_token + return access_token, new_refresh_token, access_token_valid_until_ms def _verify_refresh_token(self, token: str) -> bool: """ @@ -832,10 +889,12 @@ class AuthHandler: return True - async def get_refresh_token_for_user_id( + async def create_refresh_token_for_user_id( self, user_id: str, device_id: str, + expiry_ts: Optional[int], + ultimate_session_expiry_ts: Optional[int], ) -> Tuple[str, int]: """ Creates a new refresh token for the user with the given user ID. @@ -843,6 +902,13 @@ class AuthHandler: Args: user_id: canonical user ID device_id: the device ID to associate with the token. + expiry_ts (milliseconds since the epoch): Time after which the + refresh token cannot be used. + If None, the refresh token never expires until it has been used. + ultimate_session_expiry_ts (milliseconds since the epoch): + Time at which the session will end and can not be extended any + further. + If None, the session can be refreshed indefinitely. Returns: The newly created refresh token and its ID in the database @@ -852,10 +918,12 @@ class AuthHandler: user_id=user_id, token=refresh_token, device_id=device_id, + expiry_ts=expiry_ts, + ultimate_session_expiry_ts=ultimate_session_expiry_ts, ) return refresh_token, refresh_token_id - async def get_access_token_for_user_id( + async def create_access_token_for_user_id( self, user_id: str, device_id: Optional[str], diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index 1f64534a8a..b4ff935546 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -124,7 +124,7 @@ class EventStreamHandler: as_client_event=as_client_event, # We don't bundle "live" events, as otherwise clients # will end up double counting annotations. - bundle_aggregations=False, + bundle_relations=False, ) chunk = { diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 3dbe611f95..c83eaea359 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -464,15 +464,6 @@ class IdentityHandler: if next_link: params["next_link"] = next_link - if self.hs.config.email.using_identity_server_from_trusted_list: - # Warn that a deprecated config option is in use - logger.warning( - 'The config option "trust_identity_server_for_password_resets" ' - 'has been replaced by "account_threepid_delegate". ' - "Please consult the sample config at docs/sample_config.yaml for " - "details and update your config file." - ) - try: data = await self.http_client.post_json_get_json( id_server + "/_matrix/identity/api/v1/validate/email/requestToken", @@ -517,15 +508,6 @@ class IdentityHandler: if next_link: params["next_link"] = next_link - if self.hs.config.email.using_identity_server_from_trusted_list: - # Warn that a deprecated config option is in use - logger.warning( - 'The config option "trust_identity_server_for_password_resets" ' - 'has been replaced by "account_threepid_delegate". ' - "Please consult the sample config at docs/sample_config.yaml for " - "details and update your config file." - ) - try: data = await self.http_client.post_json_get_json( id_server + "/_matrix/identity/api/v1/validate/msisdn/requestToken", diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d4c2a6ab7a..95b4fad3c6 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -252,7 +252,7 @@ class MessageHandler: now, # We don't bother bundling aggregations in when asked for state # events, as clients won't use them. - bundle_aggregations=False, + bundle_relations=False, ) return events @@ -1001,13 +1001,52 @@ class EventCreationHandler: ) self.validator.validate_new(event, self.config) + await self._validate_event_relation(event) + logger.debug("Created event %s", event.event_id) + + return event, context + + async def _validate_event_relation(self, event: EventBase) -> None: + """ + Ensure the relation data on a new event is not bogus. + + Args: + event: The event being created. + + Raises: + SynapseError if the event is invalid. + """ + + relation = event.content.get("m.relates_to") + if not relation: + return + + relation_type = relation.get("rel_type") + if not relation_type: + return + + # Ensure the parent is real. + relates_to = relation.get("event_id") + if not relates_to: + return + + parent_event = await self.store.get_event(relates_to, allow_none=True) + if parent_event: + # And in the same room. + if parent_event.room_id != event.room_id: + raise SynapseError(400, "Relations must be in the same room") + + else: + # There must be some reason that the client knows the event exists, + # see if there are existing relations. If so, assume everything is fine. + if not await self.store.event_is_target_of_relation(relates_to): + # Otherwise, the client can't know about the parent event! + raise SynapseError(400, "Can't send relation to unknown event") # If this event is an annotation then we check that that the sender # can't annotate the same way twice (e.g. stops users from liking an # event multiple times). - relation = event.content.get("m.relates_to", {}) - if relation.get("rel_type") == RelationTypes.ANNOTATION: - relates_to = relation["event_id"] + if relation_type == RelationTypes.ANNOTATION: aggregation_key = relation["key"] already_exists = await self.store.has_user_annotated_event( @@ -1016,9 +1055,12 @@ class EventCreationHandler: if already_exists: raise SynapseError(400, "Can't send same reaction twice") - logger.debug("Created event %s", event.event_id) - - return event, context + # Don't attempt to start a thread if the parent event is a relation. + elif relation_type == RelationTypes.THREAD: + if await self.store.event_includes_relation(relates_to): + raise SynapseError( + 400, "Cannot start threads from an event with a relation" + ) @measure_func("handle_new_client_event") async def handle_new_client_event( diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index a0e6a01775..24ca11b924 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -116,7 +116,10 @@ class RegistrationHandler: self.pusher_pool = hs.get_pusherpool() self.session_lifetime = hs.config.registration.session_lifetime - self.access_token_lifetime = hs.config.registration.access_token_lifetime + self.refreshable_access_token_lifetime = ( + hs.config.registration.refreshable_access_token_lifetime + ) + self.refresh_token_lifetime = hs.config.registration.refresh_token_lifetime init_counters_for_auth_provider("") @@ -791,13 +794,13 @@ class RegistrationHandler: class and RegisterDeviceReplicationServlet. """ assert not self.hs.config.worker.worker_app - valid_until_ms = None + access_token_expiry = None if self.session_lifetime is not None: if is_guest: raise Exception( "session_lifetime is not currently implemented for guest access" ) - valid_until_ms = self.clock.time_msec() + self.session_lifetime + access_token_expiry = self.clock.time_msec() + self.session_lifetime refresh_token = None refresh_token_id = None @@ -806,23 +809,57 @@ class RegistrationHandler: user_id, device_id, initial_display_name ) if is_guest: - assert valid_until_ms is None + assert access_token_expiry is None access_token = self.macaroon_gen.generate_guest_access_token(user_id) else: if should_issue_refresh_token: + # A refreshable access token lifetime must be configured + # since we're told to issue a refresh token (the caller checks + # that this value is set before setting this flag). + assert self.refreshable_access_token_lifetime is not None + + now_ms = self.clock.time_msec() + + # Set the expiry time of the refreshable access token + access_token_expiry = now_ms + self.refreshable_access_token_lifetime + + # Set the refresh token expiry time (if configured) + refresh_token_expiry = None + if self.refresh_token_lifetime is not None: + refresh_token_expiry = now_ms + self.refresh_token_lifetime + + # Set an ultimate session expiry time (if configured) + ultimate_session_expiry_ts = None + if self.session_lifetime is not None: + ultimate_session_expiry_ts = now_ms + self.session_lifetime + + # Also ensure that the issued tokens don't outlive the + # session. + # (It would be weird to configure a homeserver with a shorter + # session lifetime than token lifetime, but may as well handle + # it.) + access_token_expiry = min( + access_token_expiry, ultimate_session_expiry_ts + ) + if refresh_token_expiry is not None: + refresh_token_expiry = min( + refresh_token_expiry, ultimate_session_expiry_ts + ) + ( refresh_token, refresh_token_id, - ) = await self._auth_handler.get_refresh_token_for_user_id( + ) = await self._auth_handler.create_refresh_token_for_user_id( user_id, device_id=registered_device_id, + expiry_ts=refresh_token_expiry, + ultimate_session_expiry_ts=ultimate_session_expiry_ts, ) - valid_until_ms = self.clock.time_msec() + self.access_token_lifetime - access_token = await self._auth_handler.get_access_token_for_user_id( + access_token = await self._auth_handler.create_access_token_for_user_id( user_id, device_id=registered_device_id, - valid_until_ms=valid_until_ms, + valid_until_ms=access_token_expiry, is_appservice_ghost=is_appservice_ghost, refresh_token_id=refresh_token_id, ) @@ -830,7 +867,7 @@ class RegistrationHandler: return { "device_id": registered_device_id, "access_token": access_token, - "valid_until_ms": valid_until_ms, + "valid_until_ms": access_token_expiry, "refresh_token": refresh_token, } diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index f9a099c4f3..88053f9869 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -775,8 +775,11 @@ class RoomCreationHandler: raise SynapseError(403, "Room visibility value not allowed.") if is_public: + room_aliases = [] + if room_alias: + room_aliases.append(room_alias.to_string()) if not self.config.roomdirectory.is_publishing_room_allowed( - user_id, room_id, room_alias + user_id, room_id, room_aliases ): # Let's just return a generic message, as there may be all sorts of # reasons why we said no. TODO: Allow configurable error messages diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 0723286383..f880aa93d2 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -221,6 +221,7 @@ class RoomBatchHandler: action=membership, content=event_dict["content"], outlier=True, + historical=True, prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same @@ -240,6 +241,7 @@ class RoomBatchHandler: ), event_dict, outlier=True, + historical=True, prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 08244b690d..a6dbff637f 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -268,6 +268,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): content: Optional[dict] = None, require_consent: bool = True, outlier: bool = False, + historical: bool = False, ) -> Tuple[str, int]: """ Internal membership update function to get an existing event or create @@ -293,6 +294,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + historical: Indicates whether the message is being inserted + back in time around some existing events. This is used to skip + a few checks and mark the event as backfilled. Returns: Tuple of event ID and stream ordering position @@ -337,6 +341,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): auth_event_ids=auth_event_ids, require_consent=require_consent, outlier=outlier, + historical=historical, ) prev_state_ids = await context.get_prev_state_ids() @@ -433,6 +438,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): new_room: bool = False, require_consent: bool = True, outlier: bool = False, + historical: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: @@ -454,6 +460,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + historical: Indicates whether the message is being inserted + back in time around some existing events. This is used to skip + a few checks and mark the event as backfilled. prev_event_ids: The event IDs to use as the prev events auth_event_ids: The event ids to use as the auth_events for the new event. @@ -487,6 +496,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): new_room=new_room, require_consent=require_consent, outlier=outlier, + historical=historical, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, ) @@ -507,6 +517,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): new_room: bool = False, require_consent: bool = True, outlier: bool = False, + historical: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: @@ -530,6 +541,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + historical: Indicates whether the message is being inserted + back in time around some existing events. This is used to skip + a few checks and mark the event as backfilled. prev_event_ids: The event IDs to use as the prev events auth_event_ids: The event ids to use as the auth_events for the new event. @@ -657,6 +671,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): content=content, require_consent=require_consent, outlier=outlier, + historical=historical, ) latest_event_ids = await self.store.get_prev_events_for_room(room_id) diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index d9764a7797..c06939e3ca 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -36,8 +36,9 @@ from synapse.api.errors import ( SynapseError, UnsupportedRoomVersionError, ) +from synapse.api.ratelimiting import Ratelimiter from synapse.events import EventBase -from synapse.types import JsonDict +from synapse.types import JsonDict, Requester from synapse.util.caches.response_cache import ResponseCache if TYPE_CHECKING: @@ -93,11 +94,14 @@ class RoomSummaryHandler: self._event_serializer = hs.get_event_client_serializer() self._server_name = hs.hostname self._federation_client = hs.get_federation_client() + self._ratelimiter = Ratelimiter( + store=self._store, clock=hs.get_clock(), rate_hz=5, burst_count=10 + ) # If a user tries to fetch the same page multiple times in quick succession, # only process the first attempt and return its result to subsequent requests. self._pagination_response_cache: ResponseCache[ - Tuple[str, bool, Optional[int], Optional[int], Optional[str]] + Tuple[str, str, bool, Optional[int], Optional[int], Optional[str]] ] = ResponseCache( hs.get_clock(), "get_room_hierarchy", @@ -249,7 +253,7 @@ class RoomSummaryHandler: async def get_room_hierarchy( self, - requester: str, + requester: Requester, requested_room_id: str, suggested_only: bool = False, max_depth: Optional[int] = None, @@ -276,15 +280,24 @@ class RoomSummaryHandler: Returns: The JSON hierarchy dictionary. """ + await self._ratelimiter.ratelimit(requester) + # If a user tries to fetch the same page multiple times in quick succession, # only process the first attempt and return its result to subsequent requests. # # This is due to the pagination process mutating internal state, attempting # to process multiple requests for the same page will result in errors. return await self._pagination_response_cache.wrap( - (requested_room_id, suggested_only, max_depth, limit, from_token), + ( + requester.user.to_string(), + requested_room_id, + suggested_only, + max_depth, + limit, + from_token, + ), self._get_room_hierarchy, - requester, + requester.user.to_string(), requested_room_id, suggested_only, max_depth, diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 22c6174821..1676ebd057 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -90,7 +90,7 @@ class FollowerTypingHandler: self.wheel_timer = WheelTimer(bucket_size=5000) @wrap_as_background_process("typing._handle_timeouts") - def _handle_timeouts(self) -> None: + async def _handle_timeouts(self) -> None: logger.debug("Checking for typing timeouts") now = self.clock.time_msec() diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 91ee5c8193..ceef57ad88 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -20,10 +20,25 @@ import os import platform import threading import time -from typing import Callable, Dict, Iterable, Mapping, Optional, Tuple, Union +from typing import ( + Any, + Callable, + Dict, + Generic, + Iterable, + Mapping, + Optional, + Sequence, + Set, + Tuple, + Type, + TypeVar, + Union, + cast, +) import attr -from prometheus_client import Counter, Gauge, Histogram +from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram, Metric from prometheus_client.core import ( REGISTRY, CounterMetricFamily, @@ -32,6 +47,7 @@ from prometheus_client.core import ( ) from twisted.internet import reactor +from twisted.internet.base import ReactorBase from twisted.python.threadpool import ThreadPool import synapse @@ -54,7 +70,7 @@ HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") class RegistryProxy: @staticmethod - def collect(): + def collect() -> Iterable[Metric]: for metric in REGISTRY.collect(): if not metric.name.startswith("__"): yield metric @@ -74,7 +90,7 @@ class LaterGauge: ] ) - def collect(self): + def collect(self) -> Iterable[Metric]: g = GaugeMetricFamily(self.name, self.desc, labels=self.labels) @@ -93,10 +109,10 @@ class LaterGauge: yield g - def __attrs_post_init__(self): + def __attrs_post_init__(self) -> None: self._register() - def _register(self): + def _register(self) -> None: if self.name in all_gauges.keys(): logger.warning("%s already registered, reregistering" % (self.name,)) REGISTRY.unregister(all_gauges.pop(self.name)) @@ -105,7 +121,12 @@ class LaterGauge: all_gauges[self.name] = self -class InFlightGauge: +# `MetricsEntry` only makes sense when it is a `Protocol`, +# but `Protocol` can't be used as a `TypeVar` bound. +MetricsEntry = TypeVar("MetricsEntry") + + +class InFlightGauge(Generic[MetricsEntry]): """Tracks number of things (e.g. requests, Measure blocks, etc) in flight at any given time. @@ -115,14 +136,19 @@ class InFlightGauge: callbacks. Args: - name (str) - desc (str) - labels (list[str]) - sub_metrics (list[str]): A list of sub metrics that the callbacks - will update. + name + desc + labels + sub_metrics: A list of sub metrics that the callbacks will update. """ - def __init__(self, name, desc, labels, sub_metrics): + def __init__( + self, + name: str, + desc: str, + labels: Sequence[str], + sub_metrics: Sequence[str], + ): self.name = name self.desc = desc self.labels = labels @@ -130,19 +156,25 @@ class InFlightGauge: # Create a class which have the sub_metrics values as attributes, which # default to 0 on initialization. Used to pass to registered callbacks. - self._metrics_class = attr.make_class( + self._metrics_class: Type[MetricsEntry] = attr.make_class( "_MetricsEntry", attrs={x: attr.ib(0) for x in sub_metrics}, slots=True ) # Counts number of in flight blocks for a given set of label values - self._registrations: Dict = {} + self._registrations: Dict[ + Tuple[str, ...], Set[Callable[[MetricsEntry], None]] + ] = {} # Protects access to _registrations self._lock = threading.Lock() self._register_with_collector() - def register(self, key, callback): + def register( + self, + key: Tuple[str, ...], + callback: Callable[[MetricsEntry], None], + ) -> None: """Registers that we've entered a new block with labels `key`. `callback` gets called each time the metrics are collected. The same @@ -158,13 +190,17 @@ class InFlightGauge: with self._lock: self._registrations.setdefault(key, set()).add(callback) - def unregister(self, key, callback): + def unregister( + self, + key: Tuple[str, ...], + callback: Callable[[MetricsEntry], None], + ) -> None: """Registers that we've exited a block with labels `key`.""" with self._lock: self._registrations.setdefault(key, set()).discard(callback) - def collect(self): + def collect(self) -> Iterable[Metric]: """Called by prometheus client when it reads metrics. Note: may be called by a separate thread. @@ -200,7 +236,7 @@ class InFlightGauge: gauge.add_metric(key, getattr(metrics, name)) yield gauge - def _register_with_collector(self): + def _register_with_collector(self) -> None: if self.name in all_gauges.keys(): logger.warning("%s already registered, reregistering" % (self.name,)) REGISTRY.unregister(all_gauges.pop(self.name)) @@ -230,7 +266,7 @@ class GaugeBucketCollector: name: str, documentation: str, buckets: Iterable[float], - registry=REGISTRY, + registry: CollectorRegistry = REGISTRY, ): """ Args: @@ -257,12 +293,12 @@ class GaugeBucketCollector: registry.register(self) - def collect(self): + def collect(self) -> Iterable[Metric]: # Don't report metrics unless we've already collected some data if self._metric is not None: yield self._metric - def update_data(self, values: Iterable[float]): + def update_data(self, values: Iterable[float]) -> None: """Update the data to be reported by the metric The existing data is cleared, and each measurement in the input is assigned @@ -304,7 +340,7 @@ class GaugeBucketCollector: class CPUMetrics: - def __init__(self): + def __init__(self) -> None: ticks_per_sec = 100 try: # Try and get the system config @@ -314,7 +350,7 @@ class CPUMetrics: self.ticks_per_sec = ticks_per_sec - def collect(self): + def collect(self) -> Iterable[Metric]: if not HAVE_PROC_SELF_STAT: return @@ -364,7 +400,7 @@ gc_time = Histogram( class GCCounts: - def collect(self): + def collect(self) -> Iterable[Metric]: cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"]) for n, m in enumerate(gc.get_count()): cm.add_metric([str(n)], m) @@ -382,7 +418,7 @@ if not running_on_pypy: class PyPyGCStats: - def collect(self): + def collect(self) -> Iterable[Metric]: # @stats is a pretty-printer object with __str__() returning a nice table, # plus some fields that contain data from that table. @@ -565,7 +601,7 @@ def register_threadpool(name: str, threadpool: ThreadPool) -> None: class ReactorLastSeenMetric: - def collect(self): + def collect(self) -> Iterable[Metric]: cm = GaugeMetricFamily( "python_twisted_reactor_last_seen", "Seconds since the Twisted reactor was last seen", @@ -584,9 +620,12 @@ MIN_TIME_BETWEEN_GCS = (1.0, 10.0, 30.0) _last_gc = [0.0, 0.0, 0.0] -def runUntilCurrentTimer(reactor, func): +F = TypeVar("F", bound=Callable[..., Any]) + + +def runUntilCurrentTimer(reactor: ReactorBase, func: F) -> F: @functools.wraps(func) - def f(*args, **kwargs): + def f(*args: Any, **kwargs: Any) -> Any: now = reactor.seconds() num_pending = 0 @@ -649,7 +688,7 @@ def runUntilCurrentTimer(reactor, func): return ret - return f + return cast(F, f) try: @@ -677,5 +716,5 @@ __all__ = [ "start_http_server", "LaterGauge", "InFlightGauge", - "BucketCollector", + "GaugeBucketCollector", ] diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py index bb9bcb5592..353d0a63b6 100644 --- a/synapse/metrics/_exposition.py +++ b/synapse/metrics/_exposition.py @@ -25,27 +25,25 @@ import math import threading from http.server import BaseHTTPRequestHandler, HTTPServer from socketserver import ThreadingMixIn -from typing import Dict, List +from typing import Any, Dict, List, Type, Union from urllib.parse import parse_qs, urlparse -from prometheus_client import REGISTRY +from prometheus_client import REGISTRY, CollectorRegistry +from prometheus_client.core import Sample from twisted.web.resource import Resource +from twisted.web.server import Request from synapse.util import caches CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" -INF = float("inf") -MINUS_INF = float("-inf") - - -def floatToGoString(d): +def floatToGoString(d: Union[int, float]) -> str: d = float(d) - if d == INF: + if d == math.inf: return "+Inf" - elif d == MINUS_INF: + elif d == -math.inf: return "-Inf" elif math.isnan(d): return "NaN" @@ -60,7 +58,7 @@ def floatToGoString(d): return s -def sample_line(line, name): +def sample_line(line: Sample, name: str) -> str: if line.labels: labelstr = "{{{0}}}".format( ",".join( @@ -82,7 +80,7 @@ def sample_line(line, name): return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp) -def generate_latest(registry, emit_help=False): +def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> bytes: # Trigger the cache metrics to be rescraped, which updates the common # metrics but do not produce metrics themselves @@ -187,7 +185,7 @@ class MetricsHandler(BaseHTTPRequestHandler): registry = REGISTRY - def do_GET(self): + def do_GET(self) -> None: registry = self.registry params = parse_qs(urlparse(self.path).query) @@ -207,11 +205,11 @@ class MetricsHandler(BaseHTTPRequestHandler): self.end_headers() self.wfile.write(output) - def log_message(self, format, *args): + def log_message(self, format: str, *args: Any) -> None: """Log nothing.""" @classmethod - def factory(cls, registry): + def factory(cls, registry: CollectorRegistry) -> Type: """Returns a dynamic MetricsHandler class tied to the passed registry. """ @@ -236,7 +234,9 @@ class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer): daemon_threads = True -def start_http_server(port, addr="", registry=REGISTRY): +def start_http_server( + port: int, addr: str = "", registry: CollectorRegistry = REGISTRY +) -> None: """Starts an HTTP server for prometheus metrics as a daemon thread""" CustomMetricsHandler = MetricsHandler.factory(registry) httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler) @@ -252,10 +252,10 @@ class MetricsResource(Resource): isLeaf = True - def __init__(self, registry=REGISTRY): + def __init__(self, registry: CollectorRegistry = REGISTRY): self.registry = registry - def render_GET(self, request): + def render_GET(self, request: Request) -> bytes: request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii")) response = generate_latest(self.registry) request.setHeader(b"Content-Length", str(len(response))) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 2ab599a334..53c508af91 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -15,19 +15,37 @@ import logging import threading from functools import wraps -from typing import TYPE_CHECKING, Dict, Optional, Set, Union +from types import TracebackType +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Dict, + Iterable, + Optional, + Set, + Type, + TypeVar, + Union, + cast, +) +from prometheus_client import Metric from prometheus_client.core import REGISTRY, Counter, Gauge from twisted.internet import defer -from synapse.logging.context import LoggingContext, PreserveLoggingContext +from synapse.logging.context import ( + ContextResourceUsage, + LoggingContext, + PreserveLoggingContext, +) from synapse.logging.opentracing import ( SynapseTags, noop_context_manager, start_active_span, ) -from synapse.util.async_helpers import maybe_awaitable if TYPE_CHECKING: import resource @@ -116,7 +134,7 @@ class _Collector: before they are returned. """ - def collect(self): + def collect(self) -> Iterable[Metric]: global _background_processes_active_since_last_scrape # We swap out the _background_processes set with an empty one so that @@ -144,12 +162,12 @@ REGISTRY.register(_Collector()) class _BackgroundProcess: - def __init__(self, desc, ctx): + def __init__(self, desc: str, ctx: LoggingContext): self.desc = desc self._context = ctx - self._reported_stats = None + self._reported_stats: Optional[ContextResourceUsage] = None - def update_metrics(self): + def update_metrics(self) -> None: """Updates the metrics with values from this process.""" new_stats = self._context.get_resource_usage() if self._reported_stats is None: @@ -169,7 +187,16 @@ class _BackgroundProcess: ) -def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwargs): +R = TypeVar("R") + + +def run_as_background_process( + desc: str, + func: Callable[..., Awaitable[Optional[R]]], + *args: Any, + bg_start_span: bool = True, + **kwargs: Any, +) -> "defer.Deferred[Optional[R]]": """Run the given function in its own logcontext, with resource metrics This should be used to wrap processes which are fired off to run in the @@ -189,11 +216,13 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar args: positional args for func kwargs: keyword args for func - Returns: Deferred which returns the result of func, but note that it does not - follow the synapse logcontext rules. + Returns: + Deferred which returns the result of func, or `None` if func raises. + Note that the returned Deferred does not follow the synapse logcontext + rules. """ - async def run(): + async def run() -> Optional[R]: with _bg_metrics_lock: count = _background_process_counts.get(desc, 0) _background_process_counts[desc] = count + 1 @@ -210,12 +239,13 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar else: ctx = noop_context_manager() with ctx: - return await maybe_awaitable(func(*args, **kwargs)) + return await func(*args, **kwargs) except Exception: logger.exception( "Background process '%s' threw an exception", desc, ) + return None finally: _background_process_in_flight_count.labels(desc).dec() @@ -225,19 +255,24 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar return defer.ensureDeferred(run()) -def wrap_as_background_process(desc): +F = TypeVar("F", bound=Callable[..., Awaitable[Optional[Any]]]) + + +def wrap_as_background_process(desc: str) -> Callable[[F], F]: """Decorator that wraps a function that gets called as a background process. - Equivalent of calling the function with `run_as_background_process` + Equivalent to calling the function with `run_as_background_process` """ - def wrap_as_background_process_inner(func): + def wrap_as_background_process_inner(func: F) -> F: @wraps(func) - def wrap_as_background_process_inner_2(*args, **kwargs): + def wrap_as_background_process_inner_2( + *args: Any, **kwargs: Any + ) -> "defer.Deferred[Optional[R]]": return run_as_background_process(desc, func, *args, **kwargs) - return wrap_as_background_process_inner_2 + return cast(F, wrap_as_background_process_inner_2) return wrap_as_background_process_inner @@ -265,7 +300,7 @@ class BackgroundProcessLoggingContext(LoggingContext): super().__init__("%s-%s" % (name, instance_id)) self._proc = _BackgroundProcess(name, self) - def start(self, rusage: "Optional[resource.struct_rusage]"): + def start(self, rusage: "Optional[resource.struct_rusage]") -> None: """Log context has started running (again).""" super().start(rusage) @@ -276,7 +311,12 @@ class BackgroundProcessLoggingContext(LoggingContext): with _bg_metrics_lock: _background_processes_active_since_last_scrape.add(self._proc) - def __exit__(self, type, value, traceback) -> None: + def __exit__( + self, + type: Optional[Type[BaseException]], + value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: """Log context has finished.""" super().__exit__(type, value, traceback) diff --git a/synapse/metrics/jemalloc.py b/synapse/metrics/jemalloc.py index 29ab6c0229..98ed9c0829 100644 --- a/synapse/metrics/jemalloc.py +++ b/synapse/metrics/jemalloc.py @@ -16,14 +16,16 @@ import ctypes import logging import os import re -from typing import Optional +from typing import Iterable, Optional + +from prometheus_client import Metric from synapse.metrics import REGISTRY, GaugeMetricFamily logger = logging.getLogger(__name__) -def _setup_jemalloc_stats(): +def _setup_jemalloc_stats() -> None: """Checks to see if jemalloc is loaded, and hooks up a collector to record statistics exposed by jemalloc. """ @@ -135,7 +137,7 @@ def _setup_jemalloc_stats(): class JemallocCollector: """Metrics for internal jemalloc stats.""" - def collect(self): + def collect(self) -> Iterable[Metric]: _jemalloc_refresh_stats() g = GaugeMetricFamily( @@ -185,7 +187,7 @@ def _setup_jemalloc_stats(): logger.debug("Added jemalloc stats") -def setup_jemalloc_stats(): +def setup_jemalloc_stats() -> None: """Try to setup jemalloc stats, if jemalloc is loaded.""" try: diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index ff79bc3c11..a8154168be 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -24,6 +24,7 @@ from typing import ( List, Optional, Tuple, + TypeVar, Union, ) @@ -35,7 +36,44 @@ from twisted.web.resource import Resource from synapse.api.errors import SynapseError from synapse.events import EventBase -from synapse.events.presence_router import PresenceRouter +from synapse.events.presence_router import ( + GET_INTERESTED_USERS_CALLBACK, + GET_USERS_FOR_STATES_CALLBACK, + PresenceRouter, +) +from synapse.events.spamcheck import ( + CHECK_EVENT_FOR_SPAM_CALLBACK, + CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK, + CHECK_REGISTRATION_FOR_SPAM_CALLBACK, + CHECK_USERNAME_FOR_SPAM_CALLBACK, + USER_MAY_CREATE_ROOM_ALIAS_CALLBACK, + USER_MAY_CREATE_ROOM_CALLBACK, + USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK, + USER_MAY_INVITE_CALLBACK, + USER_MAY_JOIN_ROOM_CALLBACK, + USER_MAY_PUBLISH_ROOM_CALLBACK, + USER_MAY_SEND_3PID_INVITE_CALLBACK, +) +from synapse.events.third_party_rules import ( + CHECK_EVENT_ALLOWED_CALLBACK, + CHECK_THREEPID_CAN_BE_INVITED_CALLBACK, + CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK, + ON_CREATE_ROOM_CALLBACK, + ON_NEW_EVENT_CALLBACK, +) +from synapse.handlers.account_validity import ( + IS_USER_EXPIRED_CALLBACK, + ON_LEGACY_ADMIN_REQUEST, + ON_LEGACY_RENEW_CALLBACK, + ON_LEGACY_SEND_MAIL_CALLBACK, + ON_USER_REGISTRATION_CALLBACK, +) +from synapse.handlers.auth import ( + CHECK_3PID_AUTH_CALLBACK, + CHECK_AUTH_CALLBACK, + ON_LOGGED_OUT_CALLBACK, + AuthHandler, +) from synapse.http.client import SimpleHttpClient from synapse.http.server import ( DirectServeHtmlResource, @@ -44,10 +82,19 @@ from synapse.http.server import ( ) from synapse.http.servlet import parse_json_object_from_request from synapse.http.site import SynapseRequest -from synapse.logging.context import make_deferred_yieldable, run_in_background +from synapse.logging.context import ( + defer_to_thread, + make_deferred_yieldable, + run_in_background, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.client.login import LoginResponse from synapse.storage import DataStore +from synapse.storage.background_updates import ( + DEFAULT_BATCH_SIZE_CALLBACK, + MIN_BATCH_SIZE_CALLBACK, + ON_UPDATE_CALLBACK, +) from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo from synapse.storage.state import StateFilter @@ -67,6 +114,9 @@ if TYPE_CHECKING: from synapse.app.generic_worker import GenericWorkerSlavedStore from synapse.server import HomeServer + +T = TypeVar("T") + """ This package defines the 'stable' API which can be used by extension modules which are loaded into Synapse. @@ -114,7 +164,7 @@ class ModuleApi: can register new users etc if necessary. """ - def __init__(self, hs: "HomeServer", auth_handler): + def __init__(self, hs: "HomeServer", auth_handler: AuthHandler) -> None: self._hs = hs # TODO: Fix this type hint once the types for the data stores have been ironed @@ -156,47 +206,139 @@ class ModuleApi: ################################################################################# # The following methods should only be called during the module's initialisation. - @property - def register_spam_checker_callbacks(self): + def register_spam_checker_callbacks( + self, + check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None, + user_may_join_room: Optional[USER_MAY_JOIN_ROOM_CALLBACK] = None, + user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None, + user_may_send_3pid_invite: Optional[USER_MAY_SEND_3PID_INVITE_CALLBACK] = None, + user_may_create_room: Optional[USER_MAY_CREATE_ROOM_CALLBACK] = None, + user_may_create_room_with_invites: Optional[ + USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK + ] = None, + user_may_create_room_alias: Optional[ + USER_MAY_CREATE_ROOM_ALIAS_CALLBACK + ] = None, + user_may_publish_room: Optional[USER_MAY_PUBLISH_ROOM_CALLBACK] = None, + check_username_for_spam: Optional[CHECK_USERNAME_FOR_SPAM_CALLBACK] = None, + check_registration_for_spam: Optional[ + CHECK_REGISTRATION_FOR_SPAM_CALLBACK + ] = None, + check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None, + ) -> None: """Registers callbacks for spam checking capabilities. Added in Synapse v1.37.0. """ - return self._spam_checker.register_callbacks + return self._spam_checker.register_callbacks( + check_event_for_spam=check_event_for_spam, + user_may_join_room=user_may_join_room, + user_may_invite=user_may_invite, + user_may_send_3pid_invite=user_may_send_3pid_invite, + user_may_create_room=user_may_create_room, + user_may_create_room_with_invites=user_may_create_room_with_invites, + user_may_create_room_alias=user_may_create_room_alias, + user_may_publish_room=user_may_publish_room, + check_username_for_spam=check_username_for_spam, + check_registration_for_spam=check_registration_for_spam, + check_media_file_for_spam=check_media_file_for_spam, + ) - @property - def register_account_validity_callbacks(self): + def register_account_validity_callbacks( + self, + is_user_expired: Optional[IS_USER_EXPIRED_CALLBACK] = None, + on_user_registration: Optional[ON_USER_REGISTRATION_CALLBACK] = None, + on_legacy_send_mail: Optional[ON_LEGACY_SEND_MAIL_CALLBACK] = None, + on_legacy_renew: Optional[ON_LEGACY_RENEW_CALLBACK] = None, + on_legacy_admin_request: Optional[ON_LEGACY_ADMIN_REQUEST] = None, + ) -> None: """Registers callbacks for account validity capabilities. Added in Synapse v1.39.0. """ - return self._account_validity_handler.register_account_validity_callbacks + return self._account_validity_handler.register_account_validity_callbacks( + is_user_expired=is_user_expired, + on_user_registration=on_user_registration, + on_legacy_send_mail=on_legacy_send_mail, + on_legacy_renew=on_legacy_renew, + on_legacy_admin_request=on_legacy_admin_request, + ) - @property - def register_third_party_rules_callbacks(self): + def register_third_party_rules_callbacks( + self, + check_event_allowed: Optional[CHECK_EVENT_ALLOWED_CALLBACK] = None, + on_create_room: Optional[ON_CREATE_ROOM_CALLBACK] = None, + check_threepid_can_be_invited: Optional[ + CHECK_THREEPID_CAN_BE_INVITED_CALLBACK + ] = None, + check_visibility_can_be_modified: Optional[ + CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK + ] = None, + on_new_event: Optional[ON_NEW_EVENT_CALLBACK] = None, + ) -> None: """Registers callbacks for third party event rules capabilities. Added in Synapse v1.39.0. """ - return self._third_party_event_rules.register_third_party_rules_callbacks + return self._third_party_event_rules.register_third_party_rules_callbacks( + check_event_allowed=check_event_allowed, + on_create_room=on_create_room, + check_threepid_can_be_invited=check_threepid_can_be_invited, + check_visibility_can_be_modified=check_visibility_can_be_modified, + on_new_event=on_new_event, + ) - @property - def register_presence_router_callbacks(self): + def register_presence_router_callbacks( + self, + get_users_for_states: Optional[GET_USERS_FOR_STATES_CALLBACK] = None, + get_interested_users: Optional[GET_INTERESTED_USERS_CALLBACK] = None, + ) -> None: """Registers callbacks for presence router capabilities. Added in Synapse v1.42.0. """ - return self._presence_router.register_presence_router_callbacks + return self._presence_router.register_presence_router_callbacks( + get_users_for_states=get_users_for_states, + get_interested_users=get_interested_users, + ) - @property - def register_password_auth_provider_callbacks(self): + def register_password_auth_provider_callbacks( + self, + check_3pid_auth: Optional[CHECK_3PID_AUTH_CALLBACK] = None, + on_logged_out: Optional[ON_LOGGED_OUT_CALLBACK] = None, + auth_checkers: Optional[ + Dict[Tuple[str, Tuple[str, ...]], CHECK_AUTH_CALLBACK] + ] = None, + ) -> None: """Registers callbacks for password auth provider capabilities. Added in Synapse v1.46.0. """ - return self._password_auth_provider.register_password_auth_provider_callbacks + return self._password_auth_provider.register_password_auth_provider_callbacks( + check_3pid_auth=check_3pid_auth, + on_logged_out=on_logged_out, + auth_checkers=auth_checkers, + ) + + def register_background_update_controller_callbacks( + self, + on_update: ON_UPDATE_CALLBACK, + default_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None, + min_batch_size: Optional[MIN_BATCH_SIZE_CALLBACK] = None, + ) -> None: + """Registers background update controller callbacks. - def register_web_resource(self, path: str, resource: Resource): + Added in Synapse v1.49.0. + """ + + for db in self._hs.get_datastores().databases: + db.updates.register_update_controller_callbacks( + on_update=on_update, + default_batch_size=default_batch_size, + min_batch_size=min_batch_size, + ) + + def register_web_resource(self, path: str, resource: Resource) -> None: """Registers a web resource to be served at the given path. This function should be called during initialisation of the module. @@ -216,7 +358,7 @@ class ModuleApi: # The following methods can be called by the module at any point in time. @property - def http_client(self): + def http_client(self) -> SimpleHttpClient: """Allows making outbound HTTP requests to remote resources. An instance of synapse.http.client.SimpleHttpClient @@ -226,7 +368,7 @@ class ModuleApi: return self._http_client @property - def public_room_list_manager(self): + def public_room_list_manager(self) -> "PublicRoomListManager": """Allows adding to, removing from and checking the status of rooms in the public room list. @@ -309,7 +451,7 @@ class ModuleApi: """ return await self._store.is_server_admin(UserID.from_string(user_id)) - def get_qualified_user_id(self, username): + def get_qualified_user_id(self, username: str) -> str: """Qualify a user id, if necessary Takes a user id provided by the user and adds the @ and :domain to @@ -318,10 +460,10 @@ class ModuleApi: Added in Synapse v0.25.0. Args: - username (str): provided user id + username: provided user id Returns: - str: qualified @user:id + qualified @user:id """ if username.startswith("@"): return username @@ -357,22 +499,27 @@ class ModuleApi: """ return await self._store.user_get_threepids(user_id) - def check_user_exists(self, user_id): + def check_user_exists(self, user_id: str) -> "defer.Deferred[Optional[str]]": """Check if user exists. Added in Synapse v0.25.0. Args: - user_id (str): Complete @user:id + user_id: Complete @user:id Returns: - Deferred[str|None]: Canonical (case-corrected) user_id, or None + Canonical (case-corrected) user_id, or None if the user is not registered. """ return defer.ensureDeferred(self._auth_handler.check_user_exists(user_id)) @defer.inlineCallbacks - def register(self, localpart, displayname=None, emails: Optional[List[str]] = None): + def register( + self, + localpart: str, + displayname: Optional[str] = None, + emails: Optional[List[str]] = None, + ) -> Generator["defer.Deferred[Any]", Any, Tuple[str, str]]: """Registers a new user with given localpart and optional displayname, emails. Also returns an access token for the new user. @@ -384,12 +531,12 @@ class ModuleApi: Added in Synapse v0.25.0. Args: - localpart (str): The localpart of the new user. - displayname (str|None): The displayname of the new user. - emails (List[str]): Emails to bind to the new user. + localpart: The localpart of the new user. + displayname: The displayname of the new user. + emails: Emails to bind to the new user. Returns: - Deferred[tuple[str, str]]: a 2-tuple of (user_id, access_token) + a 2-tuple of (user_id, access_token) """ logger.warning( "Using deprecated ModuleApi.register which creates a dummy user device." @@ -399,23 +546,26 @@ class ModuleApi: return user_id, access_token def register_user( - self, localpart, displayname=None, emails: Optional[List[str]] = None - ): + self, + localpart: str, + displayname: Optional[str] = None, + emails: Optional[List[str]] = None, + ) -> "defer.Deferred[str]": """Registers a new user with given localpart and optional displayname, emails. Added in Synapse v1.2.0. Args: - localpart (str): The localpart of the new user. - displayname (str|None): The displayname of the new user. - emails (List[str]): Emails to bind to the new user. + localpart: The localpart of the new user. + displayname: The displayname of the new user. + emails: Emails to bind to the new user. Raises: SynapseError if there is an error performing the registration. Check the 'errcode' property for more information on the reason for failure Returns: - defer.Deferred[str]: user_id + user_id """ return defer.ensureDeferred( self._hs.get_registration_handler().register_user( @@ -425,20 +575,25 @@ class ModuleApi: ) ) - def register_device(self, user_id, device_id=None, initial_display_name=None): + def register_device( + self, + user_id: str, + device_id: Optional[str] = None, + initial_display_name: Optional[str] = None, + ) -> "defer.Deferred[Tuple[str, str, Optional[int], Optional[str]]]": """Register a device for a user and generate an access token. Added in Synapse v1.2.0. Args: - user_id (str): full canonical @user:id - device_id (str|None): The device ID to check, or None to generate + user_id: full canonical @user:id + device_id: The device ID to check, or None to generate a new one. - initial_display_name (str|None): An optional display name for the + initial_display_name: An optional display name for the device. Returns: - defer.Deferred[tuple[str, str]]: Tuple of device ID and access token + Tuple of device ID, access token, access token expiration time and refresh token """ return defer.ensureDeferred( self._hs.get_registration_handler().register_device( @@ -492,7 +647,9 @@ class ModuleApi: ) @defer.inlineCallbacks - def invalidate_access_token(self, access_token): + def invalidate_access_token( + self, access_token: str + ) -> Generator["defer.Deferred[Any]", Any, None]: """Invalidate an access token for a user Added in Synapse v0.25.0. @@ -524,14 +681,20 @@ class ModuleApi: self._auth_handler.delete_access_token(access_token) ) - def run_db_interaction(self, desc, func, *args, **kwargs): + def run_db_interaction( + self, + desc: str, + func: Callable[..., T], + *args: Any, + **kwargs: Any, + ) -> "defer.Deferred[T]": """Run a function with a database connection Added in Synapse v0.25.0. Args: - desc (str): description for the transaction, for metrics etc - func (func): function to be run. Passed a database cursor object + desc: description for the transaction, for metrics etc + func: function to be run. Passed a database cursor object as well as *args and **kwargs *args: positional args to be passed to func **kwargs: named args to be passed to func @@ -545,7 +708,7 @@ class ModuleApi: def complete_sso_login( self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str - ): + ) -> None: """Complete a SSO login by redirecting the user to a page to confirm whether they want their access token sent to `client_redirect_url`, or redirect them to that URL with a token directly if the URL matches with one of the whitelisted clients. @@ -575,7 +738,7 @@ class ModuleApi: client_redirect_url: str, new_user: bool = False, auth_provider_id: str = "<unknown>", - ): + ) -> None: """Complete a SSO login by redirecting the user to a page to confirm whether they want their access token sent to `client_redirect_url`, or redirect them to that URL with a token directly if the URL matches with one of the whitelisted clients. @@ -814,11 +977,11 @@ class ModuleApi: self, f: Callable, msec: float, - *args, + *args: object, desc: Optional[str] = None, run_on_all_instances: bool = False, - **kwargs, - ): + **kwargs: object, + ) -> None: """Wraps a function as a background process and calls it repeatedly. NOTE: Will only run on the instance that is configured to run @@ -859,13 +1022,18 @@ class ModuleApi: f, ) + async def sleep(self, seconds: float) -> None: + """Sleeps for the given number of seconds.""" + + await self._clock.sleep(seconds) + async def send_mail( self, recipient: str, subject: str, html: str, text: str, - ): + ) -> None: """Send an email on behalf of the homeserver. Added in Synapse v1.39.0. @@ -903,7 +1071,7 @@ class ModuleApi: A list containing the loaded templates, with the orders matching the one of the filenames parameter. """ - return self._hs.config.read_templates( + return self._hs.config.server.read_templates( filenames, (td for td in (self.custom_template_dir, custom_template_directory) if td), ) @@ -1013,6 +1181,26 @@ class ModuleApi: return {key: state_events[event_id] for key, event_id in state_ids.items()} + async def defer_to_thread( + self, + f: Callable[..., T], + *args: Any, + **kwargs: Any, + ) -> T: + """Runs the given function in a separate thread from Synapse's thread pool. + + Added in Synapse v1.49.0. + + Args: + f: The function to run. + args: The function's arguments. + kwargs: The function's keyword arguments. + + Returns: + The return value of the function once ran in a thread. + """ + return await defer_to_thread(self._hs.get_reactor(), f, *args, **kwargs) + class PublicRoomListManager: """Contains methods for adding to, removing from and querying whether a room diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 154e5b7028..7d26954244 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -86,7 +86,7 @@ REQUIREMENTS = [ # We enforce that we have a `cryptography` version that bundles an `openssl` # with the latest security patches. "cryptography>=3.4.7", - "ijson>=3.0", + "ijson>=3.1", ] CONDITIONAL_REQUIREMENTS = { diff --git a/synapse/replication/slave/storage/_slaved_id_tracker.py b/synapse/replication/slave/storage/_slaved_id_tracker.py index 8c1bf9227a..fa132d10b4 100644 --- a/synapse/replication/slave/storage/_slaved_id_tracker.py +++ b/synapse/replication/slave/storage/_slaved_id_tracker.py @@ -14,10 +14,18 @@ from typing import List, Optional, Tuple from synapse.storage.database import LoggingDatabaseConnection -from synapse.storage.util.id_generators import _load_current_id +from synapse.storage.util.id_generators import AbstractStreamIdTracker, _load_current_id -class SlavedIdTracker: +class SlavedIdTracker(AbstractStreamIdTracker): + """Tracks the "current" stream ID of a stream with a single writer. + + See `AbstractStreamIdTracker` for more details. + + Note that this class does not work correctly when there are multiple + writers. + """ + def __init__( self, db_conn: LoggingDatabaseConnection, @@ -36,17 +44,7 @@ class SlavedIdTracker: self._current = (max if self.step > 0 else min)(self._current, new_id) def get_current_token(self) -> int: - """ - - Returns: - int - """ return self._current def get_current_token_for_writer(self, instance_name: str) -> int: - """Returns the position of the given writer. - - For streams with single writers this is equivalent to - `get_current_token`. - """ return self.get_current_token() diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 4d5f862862..7541e21de9 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import PushRulesStream from synapse.storage.databases.main.push_rule import PushRulesWorkerStore @@ -25,9 +24,6 @@ class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore): return self._push_rules_stream_id_gen.get_current_token() def process_replication_rows(self, stream_name, instance_name, token, rows): - # We assert this for the benefit of mypy - assert isinstance(self._push_rules_stream_id_gen, SlavedIdTracker) - if stream_name == PushRulesStream.NAME: self._push_rules_stream_id_gen.advance(instance_name, token) for row in rows: diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index a030e9299e..a390cfcb74 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -14,7 +14,7 @@ # limitations under the License. import heapq from collections.abc import Iterable -from typing import TYPE_CHECKING, List, Optional, Tuple, Type +from typing import TYPE_CHECKING, Optional, Tuple, Type import attr @@ -157,7 +157,7 @@ class EventsStream(Stream): # now we fetch up to that many rows from the events table - event_rows: List[Tuple] = await self._store.get_all_new_forward_event_rows( + event_rows = await self._store.get_all_new_forward_event_rows( instance_name, from_token, current_token, target_row_count ) @@ -191,7 +191,7 @@ class EventsStream(Stream): # finally, fetch the ex-outliers rows. We assume there are few enough of these # not to bother with the limit. - ex_outliers_rows: List[Tuple] = await self._store.get_ex_outlier_stream_rows( + ex_outliers_rows = await self._store.get_ex_outlier_stream_rows( instance_name, from_token, upper_limit ) diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 36cfd1e4e2..d7621981d9 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -28,6 +28,7 @@ from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin from synapse.rest.admin.background_updates import ( BackgroundUpdateEnabledRestServlet, BackgroundUpdateRestServlet, + BackgroundUpdateStartJobRestServlet, ) from synapse.rest.admin.devices import ( DeleteDevicesRestServlet, @@ -46,6 +47,7 @@ from synapse.rest.admin.registration_tokens import ( RegistrationTokenRestServlet, ) from synapse.rest.admin.rooms import ( + BlockRoomRestServlet, DeleteRoomStatusByDeleteIdRestServlet, DeleteRoomStatusByRoomIdRestServlet, ForwardExtremitiesRestServlet, @@ -224,6 +226,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: Register all the admin servlets. """ register_servlets_for_client_rest_resource(hs, http_server) + BlockRoomRestServlet(hs).register(http_server) ListRoomRestServlet(hs).register(http_server) RoomStateRestServlet(hs).register(http_server) RoomRestServlet(hs).register(http_server) @@ -261,6 +264,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: SendServerNoticeServlet(hs).register(http_server) BackgroundUpdateEnabledRestServlet(hs).register(http_server) BackgroundUpdateRestServlet(hs).register(http_server) + BackgroundUpdateStartJobRestServlet(hs).register(http_server) def register_servlets_for_client_rest_resource( diff --git a/synapse/rest/admin/background_updates.py b/synapse/rest/admin/background_updates.py index 0d0183bf20..479672d4d5 100644 --- a/synapse/rest/admin/background_updates.py +++ b/synapse/rest/admin/background_updates.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from http import HTTPStatus from typing import TYPE_CHECKING, Tuple from synapse.api.errors import SynapseError -from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.servlet import ( + RestServlet, + assert_params_in_dict, + parse_json_object_from_request, +) from synapse.http.site import SynapseRequest from synapse.rest.admin._base import admin_patterns, assert_user_is_admin from synapse.types import JsonDict @@ -29,37 +34,36 @@ logger = logging.getLogger(__name__) class BackgroundUpdateEnabledRestServlet(RestServlet): """Allows temporarily disabling background updates""" - PATTERNS = admin_patterns("/background_updates/enabled") + PATTERNS = admin_patterns("/background_updates/enabled$") def __init__(self, hs: "HomeServer"): - self.group_server = hs.get_groups_server_handler() - self.is_mine_id = hs.is_mine_id - self.auth = hs.get_auth() - - self.data_stores = hs.get_datastores() + self._auth = hs.get_auth() + self._data_stores = hs.get_datastores() async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) # We need to check that all configured databases have updates enabled. # (They *should* all be in sync.) - enabled = all(db.updates.enabled for db in self.data_stores.databases) + enabled = all(db.updates.enabled for db in self._data_stores.databases) - return 200, {"enabled": enabled} + return HTTPStatus.OK, {"enabled": enabled} async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) body = parse_json_object_from_request(request) enabled = body.get("enabled", True) if not isinstance(enabled, bool): - raise SynapseError(400, "'enabled' parameter must be a boolean") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "'enabled' parameter must be a boolean" + ) - for db in self.data_stores.databases: + for db in self._data_stores.databases: db.updates.enabled = enabled # If we're re-enabling them ensure that we start the background @@ -67,32 +71,29 @@ class BackgroundUpdateEnabledRestServlet(RestServlet): if enabled: db.updates.start_doing_background_updates() - return 200, {"enabled": enabled} + return HTTPStatus.OK, {"enabled": enabled} class BackgroundUpdateRestServlet(RestServlet): """Fetch information about background updates""" - PATTERNS = admin_patterns("/background_updates/status") + PATTERNS = admin_patterns("/background_updates/status$") def __init__(self, hs: "HomeServer"): - self.group_server = hs.get_groups_server_handler() - self.is_mine_id = hs.is_mine_id - self.auth = hs.get_auth() - - self.data_stores = hs.get_datastores() + self._auth = hs.get_auth() + self._data_stores = hs.get_datastores() async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) # We need to check that all configured databases have updates enabled. # (They *should* all be in sync.) - enabled = all(db.updates.enabled for db in self.data_stores.databases) + enabled = all(db.updates.enabled for db in self._data_stores.databases) current_updates = {} - for db in self.data_stores.databases: + for db in self._data_stores.databases: update = db.updates.get_current_update() if not update: continue @@ -104,4 +105,72 @@ class BackgroundUpdateRestServlet(RestServlet): "average_items_per_ms": update.average_items_per_ms(), } - return 200, {"enabled": enabled, "current_updates": current_updates} + return HTTPStatus.OK, {"enabled": enabled, "current_updates": current_updates} + + +class BackgroundUpdateStartJobRestServlet(RestServlet): + """Allows to start specific background updates""" + + PATTERNS = admin_patterns("/background_updates/start_job") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) + + body = parse_json_object_from_request(request) + assert_params_in_dict(body, ["job_name"]) + + job_name = body["job_name"] + + if job_name == "populate_stats_process_rooms": + jobs = [ + { + "update_name": "populate_stats_process_rooms", + "progress_json": "{}", + }, + ] + elif job_name == "regenerate_directory": + jobs = [ + { + "update_name": "populate_user_directory_createtables", + "progress_json": "{}", + "depends_on": "", + }, + { + "update_name": "populate_user_directory_process_rooms", + "progress_json": "{}", + "depends_on": "populate_user_directory_createtables", + }, + { + "update_name": "populate_user_directory_process_users", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_rooms", + }, + { + "update_name": "populate_user_directory_cleanup", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_users", + }, + ] + else: + raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid job_name") + + try: + await self._store.db_pool.simple_insert_many( + table="background_updates", + values=jobs, + desc=f"admin_api_run_{job_name}", + ) + except self._store.db_pool.engine.module.IntegrityError: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Job %s is already in queue of background updates." % (job_name,), + ) + + self._store.db_pool.updates.start_doing_background_updates() + + return HTTPStatus.OK, {} diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 37cb4d0796..a89dda1ba5 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -448,7 +448,7 @@ class RoomStateRestServlet(RestServlet): now, # We don't bother bundling aggregations in when asked for state # events, as clients won't use them. - bundle_aggregations=False, + bundle_relations=False, ) ret = {"state": room_state} @@ -778,7 +778,70 @@ class RoomEventContextServlet(RestServlet): results["state"], time_now, # No need to bundle aggregations for state events - bundle_aggregations=False, + bundle_relations=False, ) return 200, results + + +class BlockRoomRestServlet(RestServlet): + """ + Manage blocking of rooms. + On PUT: Add or remove a room from blocking list. + On GET: Get blocking status of room and user who has blocked this room. + """ + + PATTERNS = admin_patterns("/rooms/(?P<room_id>[^/]+)/block$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + if not RoomID.is_valid(room_id): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,) + ) + + blocked_by = await self._store.room_is_blocked_by(room_id) + # Test `not None` if `user_id` is an empty string + # if someone add manually an entry in database + if blocked_by is not None: + response = {"block": True, "user_id": blocked_by} + else: + response = {"block": False} + + return HTTPStatus.OK, response + + async def on_PUT( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) + + content = parse_json_object_from_request(request) + + if not RoomID.is_valid(room_id): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,) + ) + + assert_params_in_dict(content, ["block"]) + block = content.get("block") + if not isinstance(block, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'block' must be a boolean.", + Codes.BAD_JSON, + ) + + if block: + await self._store.block_room(room_id, requester.user.to_string()) + else: + await self._store.unblock_room(room_id) + + return HTTPStatus.OK, {"block": block} diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 23a8bf1fdb..ccd9a2a175 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -898,7 +898,7 @@ class UserTokenRestServlet(RestServlet): if auth_user.to_string() == user_id: raise SynapseError(400, "Cannot use admin API to login as self") - token = await self.auth_handler.get_access_token_for_user_id( + token = await self.auth_handler.create_access_token_for_user_id( user_id=auth_user.to_string(), device_id=None, valid_until_ms=valid_until_ms, diff --git a/synapse/rest/client/_base.py b/synapse/rest/client/_base.py index a0971ce994..b4cb90cb76 100644 --- a/synapse/rest/client/_base.py +++ b/synapse/rest/client/_base.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) def client_patterns( path_regex: str, - releases: Iterable[int] = (0,), + releases: Iterable[str] = ("r0", "v3"), unstable: bool = True, v1: bool = False, ) -> Iterable[Pattern]: @@ -52,7 +52,7 @@ def client_patterns( v1_prefix = CLIENT_API_PREFIX + "/api/v1" patterns.append(re.compile("^" + v1_prefix + path_regex)) for release in releases: - new_prefix = CLIENT_API_PREFIX + "/r%d" % (release,) + new_prefix = CLIENT_API_PREFIX + f"/{release}" patterns.append(re.compile("^" + new_prefix + path_regex)) return patterns diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py index 7281b2ee29..730c18f08f 100644 --- a/synapse/rest/client/keys.py +++ b/synapse/rest/client/keys.py @@ -262,7 +262,7 @@ class SigningKeyUploadServlet(RestServlet): } """ - PATTERNS = client_patterns("/keys/device_signing/upload$", releases=()) + PATTERNS = client_patterns("/keys/device_signing/upload$", releases=("v3",)) def __init__(self, hs: "HomeServer"): super().__init__() diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index 467444a041..09f378f919 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -14,7 +14,17 @@ import logging import re -from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, List, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) from typing_extensions import TypedDict @@ -28,7 +38,6 @@ from synapse.http.server import HttpServer, finish_request from synapse.http.servlet import ( RestServlet, assert_params_in_dict, - parse_boolean, parse_bytes_from_args, parse_json_object_from_request, parse_string, @@ -72,6 +81,7 @@ class LoginRestServlet(RestServlet): # JWT configuration variables. self.jwt_enabled = hs.config.jwt.jwt_enabled self.jwt_secret = hs.config.jwt.jwt_secret + self.jwt_subject_claim = hs.config.jwt.jwt_subject_claim self.jwt_algorithm = hs.config.jwt.jwt_algorithm self.jwt_issuer = hs.config.jwt.jwt_issuer self.jwt_audiences = hs.config.jwt.jwt_audiences @@ -80,7 +90,9 @@ class LoginRestServlet(RestServlet): self.saml2_enabled = hs.config.saml2.saml2_enabled self.cas_enabled = hs.config.cas.cas_enabled self.oidc_enabled = hs.config.oidc.oidc_enabled - self._msc2918_enabled = hs.config.registration.access_token_lifetime is not None + self._msc2918_enabled = ( + hs.config.registration.refreshable_access_token_lifetime is not None + ) self.auth = hs.get_auth() @@ -152,11 +164,14 @@ class LoginRestServlet(RestServlet): login_submission = parse_json_object_from_request(request) if self._msc2918_enabled: - # Check if this login should also issue a refresh token, as per - # MSC2918 - should_issue_refresh_token = parse_boolean( - request, name=LoginRestServlet.REFRESH_TOKEN_PARAM, default=False + # Check if this login should also issue a refresh token, as per MSC2918 + should_issue_refresh_token = login_submission.get( + "org.matrix.msc2918.refresh_token", False ) + if not isinstance(should_issue_refresh_token, bool): + raise SynapseError( + 400, "`org.matrix.msc2918.refresh_token` should be true or false." + ) else: should_issue_refresh_token = False @@ -413,7 +428,7 @@ class LoginRestServlet(RestServlet): errcode=Codes.FORBIDDEN, ) - user = payload.get("sub", None) + user = payload.get(self.jwt_subject_claim, None) if user is None: raise LoginError(403, "Invalid JWT", errcode=Codes.FORBIDDEN) @@ -452,7 +467,10 @@ class RefreshTokenServlet(RestServlet): def __init__(self, hs: "HomeServer"): self._auth_handler = hs.get_auth_handler() self._clock = hs.get_clock() - self.access_token_lifetime = hs.config.registration.access_token_lifetime + self.refreshable_access_token_lifetime = ( + hs.config.registration.refreshable_access_token_lifetime + ) + self.refresh_token_lifetime = hs.config.registration.refresh_token_lifetime async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: refresh_submission = parse_json_object_from_request(request) @@ -462,20 +480,33 @@ class RefreshTokenServlet(RestServlet): if not isinstance(token, str): raise SynapseError(400, "Invalid param: refresh_token", Codes.INVALID_PARAM) - valid_until_ms = self._clock.time_msec() + self.access_token_lifetime - access_token, refresh_token = await self._auth_handler.refresh_token( - token, valid_until_ms - ) - expires_in_ms = valid_until_ms - self._clock.time_msec() - return ( - 200, - { - "access_token": access_token, - "refresh_token": refresh_token, - "expires_in_ms": expires_in_ms, - }, + now = self._clock.time_msec() + access_valid_until_ms = None + if self.refreshable_access_token_lifetime is not None: + access_valid_until_ms = now + self.refreshable_access_token_lifetime + refresh_valid_until_ms = None + if self.refresh_token_lifetime is not None: + refresh_valid_until_ms = now + self.refresh_token_lifetime + + ( + access_token, + refresh_token, + actual_access_token_expiry, + ) = await self._auth_handler.refresh_token( + token, access_valid_until_ms, refresh_valid_until_ms ) + response: Dict[str, Union[str, int]] = { + "access_token": access_token, + "refresh_token": refresh_token, + } + + # expires_in_ms is only present if the token expires + if actual_access_token_expiry is not None: + response["expires_in_ms"] = actual_access_token_expiry - now + + return 200, response + class SsoRedirectServlet(RestServlet): PATTERNS = list(client_patterns("/login/(cas|sso)/redirect$", v1=True)) + [ @@ -561,7 +592,7 @@ class CasTicketServlet(RestServlet): def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: LoginRestServlet(hs).register(http_server) - if hs.config.registration.access_token_lifetime is not None: + if hs.config.registration.refreshable_access_token_lifetime is not None: RefreshTokenServlet(hs).register(http_server) SsoRedirectServlet(hs).register(http_server) if hs.config.cas.cas_enabled: diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py index bf3cb34146..11fd6cd24d 100644 --- a/synapse/rest/client/register.py +++ b/synapse/rest/client/register.py @@ -41,7 +41,6 @@ from synapse.http.server import HttpServer, finish_request, respond_with_html from synapse.http.servlet import ( RestServlet, assert_params_in_dict, - parse_boolean, parse_json_object_from_request, parse_string, ) @@ -420,7 +419,9 @@ class RegisterRestServlet(RestServlet): self.password_policy_handler = hs.get_password_policy_handler() self.clock = hs.get_clock() self._registration_enabled = self.hs.config.registration.enable_registration - self._msc2918_enabled = hs.config.registration.access_token_lifetime is not None + self._msc2918_enabled = ( + hs.config.registration.refreshable_access_token_lifetime is not None + ) self._registration_flows = _calculate_registration_flows( hs.config, self.auth_handler @@ -447,9 +448,13 @@ class RegisterRestServlet(RestServlet): if self._msc2918_enabled: # Check if this registration should also issue a refresh token, as # per MSC2918 - should_issue_refresh_token = parse_boolean( - request, name="org.matrix.msc2918.refresh_token", default=False + should_issue_refresh_token = body.get( + "org.matrix.msc2918.refresh_token", False ) + if not isinstance(should_issue_refresh_token, bool): + raise SynapseError( + 400, "`org.matrix.msc2918.refresh_token` should be true or false." + ) else: should_issue_refresh_token = False diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py index 184cfbe196..45e9f1dd90 100644 --- a/synapse/rest/client/relations.py +++ b/synapse/rest/client/relations.py @@ -224,17 +224,17 @@ class RelationPaginationServlet(RestServlet): ) now = self.clock.time_msec() - # We set bundle_aggregations to False when retrieving the original + # We set bundle_relations to False when retrieving the original # event because we want the content before relations were applied to # it. original_event = await self._event_serializer.serialize_event( - event, now, bundle_aggregations=False + event, now, bundle_relations=False ) # Similarly, we don't allow relations to be applied to relations, so we # return the original relations without any aggregations on top of them # here. serialized_events = await self._event_serializer.serialize_events( - events, now, bundle_aggregations=False + events, now, bundle_relations=False ) return_value = pagination_chunk.to_dict() diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 03a353d53c..73d0f7c950 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -719,7 +719,7 @@ class RoomEventContextServlet(RestServlet): results["state"], time_now, # No need to bundle aggregations for state events - bundle_aggregations=False, + bundle_relations=False, ) return 200, results @@ -1138,12 +1138,12 @@ class RoomSpaceSummaryRestServlet(RestServlet): class RoomHierarchyRestServlet(RestServlet): - PATTERNS = ( + PATTERNS = [ re.compile( - "^/_matrix/client/unstable/org.matrix.msc2946" + "^/_matrix/client/(v1|unstable/org.matrix.msc2946)" "/rooms/(?P<room_id>[^/]*)/hierarchy$" ), - ) + ] def __init__(self, hs: "HomeServer"): super().__init__() @@ -1168,7 +1168,7 @@ class RoomHierarchyRestServlet(RestServlet): ) return 200, await self._room_summary_handler.get_room_hierarchy( - requester.user.to_string(), + requester, room_id, suggested_only=parse_boolean(request, "suggested_only", default=False), max_depth=max_depth, diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 8c0fdb1940..b6a2485732 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -522,7 +522,7 @@ class SyncRestServlet(RestServlet): time_now=time_now, # We don't bundle "live" events, as otherwise clients # will end up double counting annotations. - bundle_aggregations=False, + bundle_relations=False, token_id=token_id, event_format=event_formatter, only_event_fields=only_fields, diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 014fa893d6..9b40fd8a6c 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -29,7 +29,7 @@ from synapse.api.errors import Codes, SynapseError, cs_error from synapse.http.server import finish_request, respond_with_json from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable -from synapse.util.stringutils import is_ascii +from synapse.util.stringutils import is_ascii, parse_and_validate_server_name logger = logging.getLogger(__name__) @@ -51,6 +51,19 @@ TEXT_CONTENT_TYPES = [ def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: + """Parses the server name, media ID and optional file name from the request URI + + Also performs some rough validation on the server name. + + Args: + request: The `Request`. + + Returns: + A tuple containing the parsed server name, media ID and optional file name. + + Raises: + SynapseError(404): if parsing or validation fail for any reason + """ try: # The type on postpath seems incorrect in Twisted 21.2.0. postpath: List[bytes] = request.postpath # type: ignore @@ -62,6 +75,9 @@ def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: server_name = server_name_bytes.decode("utf-8") media_id = media_id_bytes.decode("utf8") + # Validate the server name, raising if invalid + parse_and_validate_server_name(server_name) + file_name = None if len(postpath) > 2: try: diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index bec77088ee..c0e15c6513 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -16,7 +16,8 @@ import functools import os import re -from typing import Any, Callable, List, TypeVar, cast +import string +from typing import Any, Callable, List, TypeVar, Union, cast NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") @@ -37,6 +38,85 @@ def _wrap_in_base_path(func: F) -> F: return cast(F, _wrapped) +GetPathMethod = TypeVar( + "GetPathMethod", bound=Union[Callable[..., str], Callable[..., List[str]]] +) + + +def _wrap_with_jail_check(func: GetPathMethod) -> GetPathMethod: + """Wraps a path-returning method to check that the returned path(s) do not escape + the media store directory. + + The check is not expected to ever fail, unless `func` is missing a call to + `_validate_path_component`, or `_validate_path_component` is buggy. + + Args: + func: The `MediaFilePaths` method to wrap. The method may return either a single + path, or a list of paths. Returned paths may be either absolute or relative. + + Returns: + The method, wrapped with a check to ensure that the returned path(s) lie within + the media store directory. Raises a `ValueError` if the check fails. + """ + + @functools.wraps(func) + def _wrapped( + self: "MediaFilePaths", *args: Any, **kwargs: Any + ) -> Union[str, List[str]]: + path_or_paths = func(self, *args, **kwargs) + + if isinstance(path_or_paths, list): + paths_to_check = path_or_paths + else: + paths_to_check = [path_or_paths] + + for path in paths_to_check: + # path may be an absolute or relative path, depending on the method being + # wrapped. When "appending" an absolute path, `os.path.join` discards the + # previous path, which is desired here. + normalized_path = os.path.normpath(os.path.join(self.real_base_path, path)) + if ( + os.path.commonpath([normalized_path, self.real_base_path]) + != self.real_base_path + ): + raise ValueError(f"Invalid media store path: {path!r}") + + return path_or_paths + + return cast(GetPathMethod, _wrapped) + + +ALLOWED_CHARACTERS = set( + string.ascii_letters + + string.digits + + "_-" + + ".[]:" # Domain names, IPv6 addresses and ports in server names +) +FORBIDDEN_NAMES = { + "", + os.path.curdir, # "." for the current platform + os.path.pardir, # ".." for the current platform +} + + +def _validate_path_component(name: str) -> str: + """Checks that the given string can be safely used as a path component + + Args: + name: The path component to check. + + Returns: + The path component if valid. + + Raises: + ValueError: If `name` cannot be safely used as a path component. + """ + if not ALLOWED_CHARACTERS.issuperset(name) or name in FORBIDDEN_NAMES: + raise ValueError(f"Invalid path component: {name!r}") + + return name + + class MediaFilePaths: """Describes where files are stored on disk. @@ -48,22 +128,46 @@ class MediaFilePaths: def __init__(self, primary_base_path: str): self.base_path = primary_base_path + # The media store directory, with all symlinks resolved. + self.real_base_path = os.path.realpath(primary_base_path) + + # Refuse to initialize if paths cannot be validated correctly for the current + # platform. + assert os.path.sep not in ALLOWED_CHARACTERS + assert os.path.altsep not in ALLOWED_CHARACTERS + # On Windows, paths have all sorts of weirdness which `_validate_path_component` + # does not consider. In any case, the remote media store can't work correctly + # for certain homeservers there, since ":"s aren't allowed in paths. + assert os.name == "posix" + + @_wrap_with_jail_check def local_media_filepath_rel(self, media_id: str) -> str: - return os.path.join("local_content", media_id[0:2], media_id[2:4], media_id[4:]) + return os.path.join( + "local_content", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + ) local_media_filepath = _wrap_in_base_path(local_media_filepath_rel) + @_wrap_with_jail_check def local_media_thumbnail_rel( self, media_id: str, width: int, height: int, content_type: str, method: str ) -> str: top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) return os.path.join( - "local_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name + "local_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + _validate_path_component(file_name), ) local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) + @_wrap_with_jail_check def local_media_thumbnail_dir(self, media_id: str) -> str: """ Retrieve the local store path of thumbnails of a given media_id @@ -76,18 +180,24 @@ class MediaFilePaths: return os.path.join( self.base_path, "local_thumbnails", - media_id[0:2], - media_id[2:4], - media_id[4:], + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), ) + @_wrap_with_jail_check def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str: return os.path.join( - "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:] + "remote_content", + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), ) remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel) + @_wrap_with_jail_check def remote_media_thumbnail_rel( self, server_name: str, @@ -101,11 +211,11 @@ class MediaFilePaths: file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) return os.path.join( "remote_thumbnail", - server_name, - file_id[0:2], - file_id[2:4], - file_id[4:], - file_name, + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), + _validate_path_component(file_name), ) remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel) @@ -113,6 +223,7 @@ class MediaFilePaths: # Legacy path that was used to store thumbnails previously. # Should be removed after some time, when most of the thumbnails are stored # using the new path. + @_wrap_with_jail_check def remote_media_thumbnail_rel_legacy( self, server_name: str, file_id: str, width: int, height: int, content_type: str ) -> str: @@ -120,43 +231,66 @@ class MediaFilePaths: file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) return os.path.join( "remote_thumbnail", - server_name, - file_id[0:2], - file_id[2:4], - file_id[4:], - file_name, + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), + _validate_path_component(file_name), ) def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str: return os.path.join( self.base_path, "remote_thumbnail", - server_name, - file_id[0:2], - file_id[2:4], - file_id[4:], + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), ) + @_wrap_with_jail_check def url_cache_filepath_rel(self, media_id: str) -> str: if NEW_FORMAT_ID_RE.match(media_id): # Media id is of the form <DATE><RANDOM_STRING> # E.g.: 2017-09-28-fsdRDt24DS234dsf - return os.path.join("url_cache", media_id[:10], media_id[11:]) + return os.path.join( + "url_cache", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + ) else: - return os.path.join("url_cache", media_id[0:2], media_id[2:4], media_id[4:]) + return os.path.join( + "url_cache", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + ) url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel) + @_wrap_with_jail_check def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]: "The dirs to try and remove if we delete the media_id file" if NEW_FORMAT_ID_RE.match(media_id): - return [os.path.join(self.base_path, "url_cache", media_id[:10])] + return [ + os.path.join( + self.base_path, "url_cache", _validate_path_component(media_id[:10]) + ) + ] else: return [ - os.path.join(self.base_path, "url_cache", media_id[0:2], media_id[2:4]), - os.path.join(self.base_path, "url_cache", media_id[0:2]), + os.path.join( + self.base_path, + "url_cache", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + ), + os.path.join( + self.base_path, "url_cache", _validate_path_component(media_id[0:2]) + ), ] + @_wrap_with_jail_check def url_cache_thumbnail_rel( self, media_id: str, width: int, height: int, content_type: str, method: str ) -> str: @@ -168,37 +302,46 @@ class MediaFilePaths: if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - "url_cache_thumbnails", media_id[:10], media_id[11:], file_name + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + _validate_path_component(file_name), ) else: return os.path.join( "url_cache_thumbnails", - media_id[0:2], - media_id[2:4], - media_id[4:], - file_name, + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + _validate_path_component(file_name), ) url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) + @_wrap_with_jail_check def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: # Media id is of the form <DATE><RANDOM_STRING> # E.g.: 2017-09-28-fsdRDt24DS234dsf if NEW_FORMAT_ID_RE.match(media_id): - return os.path.join("url_cache_thumbnails", media_id[:10], media_id[11:]) + return os.path.join( + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + ) else: return os.path.join( "url_cache_thumbnails", - media_id[0:2], - media_id[2:4], - media_id[4:], + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), ) url_cache_thumbnail_directory = _wrap_in_base_path( url_cache_thumbnail_directory_rel ) + @_wrap_with_jail_check def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]: "The dirs to try and remove if we delete the media_id thumbnails" # Media id is of the form <DATE><RANDOM_STRING> @@ -206,21 +349,35 @@ class MediaFilePaths: if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:] + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + ), + os.path.join( + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), ), - os.path.join(self.base_path, "url_cache_thumbnails", media_id[:10]), ] else: return [ os.path.join( self.base_path, "url_cache_thumbnails", - media_id[0:2], - media_id[2:4], - media_id[4:], + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), ), os.path.join( - self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4] + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + ), + os.path.join( + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[0:2]), ), - os.path.join(self.base_path, "url_cache_thumbnails", media_id[0:2]), ] diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 1605411b00..446204dbe5 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -764,7 +764,7 @@ class StateResolutionStore: store: "DataStore" def get_events( - self, event_ids: Iterable[str], allow_rejected: bool = False + self, event_ids: Collection[str], allow_rejected: bool = False ) -> Awaitable[Dict[str, EventBase]]: """Get events from the database diff --git a/synapse/state/v1.py b/synapse/state/v1.py index 6edadea550..499a328201 100644 --- a/synapse/state/v1.py +++ b/synapse/state/v1.py @@ -17,6 +17,7 @@ import logging from typing import ( Awaitable, Callable, + Collection, Dict, Iterable, List, @@ -44,7 +45,7 @@ async def resolve_events_with_store( room_version: RoomVersion, state_sets: Sequence[StateMap[str]], event_map: Optional[Dict[str, EventBase]], - state_map_factory: Callable[[Iterable[str]], Awaitable[Dict[str, EventBase]]], + state_map_factory: Callable[[Collection[str]], Awaitable[Dict[str, EventBase]]], ) -> StateMap[str]: """ Args: diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 0623da9aa1..3056e64ff5 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -21,7 +21,7 @@ from synapse.storage.database import LoggingTransaction # noqa: F401 from synapse.storage.database import make_in_list_sql_clause # noqa: F401 from synapse.storage.database import DatabasePool from synapse.storage.types import Connection -from synapse.types import StreamToken, get_domain_from_id +from synapse.types import get_domain_from_id from synapse.util import json_decoder if TYPE_CHECKING: @@ -48,7 +48,7 @@ class SQLBaseStore(metaclass=ABCMeta): self, stream_name: str, instance_name: str, - token: StreamToken, + token: int, rows: Iterable[Any], ) -> None: pass diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index b9a8ca997e..d64910aded 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -12,12 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Iterable, Optional +from typing import ( + TYPE_CHECKING, + AsyncContextManager, + Awaitable, + Callable, + Dict, + Iterable, + Optional, +) + +import attr from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.types import Connection from synapse.types import JsonDict -from synapse.util import json_encoder +from synapse.util import Clock, json_encoder from . import engines @@ -28,6 +38,45 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +ON_UPDATE_CALLBACK = Callable[[str, str, bool], AsyncContextManager[int]] +DEFAULT_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]] +MIN_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]] + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _BackgroundUpdateHandler: + """A handler for a given background update. + + Attributes: + callback: The function to call to make progress on the background + update. + oneshot: Wether the update is likely to happen all in one go, ignoring + the supplied target duration, e.g. index creation. This is used by + the update controller to help correctly schedule the update. + """ + + callback: Callable[[JsonDict, int], Awaitable[int]] + oneshot: bool = False + + +class _BackgroundUpdateContextManager: + BACKGROUND_UPDATE_INTERVAL_MS = 1000 + BACKGROUND_UPDATE_DURATION_MS = 100 + + def __init__(self, sleep: bool, clock: Clock): + self._sleep = sleep + self._clock = clock + + async def __aenter__(self) -> int: + if self._sleep: + await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000) + + return self.BACKGROUND_UPDATE_DURATION_MS + + async def __aexit__(self, *exc) -> None: + pass + + class BackgroundUpdatePerformance: """Tracks the how long a background update is taking to update its items""" @@ -82,22 +131,24 @@ class BackgroundUpdater: process and autotuning the batch size. """ - MINIMUM_BACKGROUND_BATCH_SIZE = 100 + MINIMUM_BACKGROUND_BATCH_SIZE = 1 DEFAULT_BACKGROUND_BATCH_SIZE = 100 - BACKGROUND_UPDATE_INTERVAL_MS = 1000 - BACKGROUND_UPDATE_DURATION_MS = 100 def __init__(self, hs: "HomeServer", database: "DatabasePool"): self._clock = hs.get_clock() self.db_pool = database + self._database_name = database.name() + # if a background update is currently running, its name. self._current_background_update: Optional[str] = None + self._on_update_callback: Optional[ON_UPDATE_CALLBACK] = None + self._default_batch_size_callback: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None + self._min_batch_size_callback: Optional[MIN_BATCH_SIZE_CALLBACK] = None + self._background_update_performance: Dict[str, BackgroundUpdatePerformance] = {} - self._background_update_handlers: Dict[ - str, Callable[[JsonDict, int], Awaitable[int]] - ] = {} + self._background_update_handlers: Dict[str, _BackgroundUpdateHandler] = {} self._all_done = False # Whether we're currently running updates @@ -107,6 +158,83 @@ class BackgroundUpdater: # enable/disable background updates via the admin API. self.enabled = True + def register_update_controller_callbacks( + self, + on_update: ON_UPDATE_CALLBACK, + default_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None, + min_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None, + ) -> None: + """Register callbacks from a module for each hook.""" + if self._on_update_callback is not None: + logger.warning( + "More than one module tried to register callbacks for controlling" + " background updates. Only the callbacks registered by the first module" + " (in order of appearance in Synapse's configuration file) that tried to" + " do so will be called." + ) + + return + + self._on_update_callback = on_update + + if default_batch_size is not None: + self._default_batch_size_callback = default_batch_size + + if min_batch_size is not None: + self._min_batch_size_callback = min_batch_size + + def _get_context_manager_for_update( + self, + sleep: bool, + update_name: str, + database_name: str, + oneshot: bool, + ) -> AsyncContextManager[int]: + """Get a context manager to run a background update with. + + If a module has registered a `update_handler` callback, use the context manager + it returns. + + Otherwise, returns a context manager that will return a default value, optionally + sleeping if needed. + + Args: + sleep: Whether we can sleep between updates. + update_name: The name of the update. + database_name: The name of the database the update is being run on. + oneshot: Whether the update will complete all in one go, e.g. index creation. + In such cases the returned target duration is ignored. + + Returns: + The target duration in milliseconds that the background update should run for. + + Note: this is a *target*, and an iteration may take substantially longer or + shorter. + """ + if self._on_update_callback is not None: + return self._on_update_callback(update_name, database_name, oneshot) + + return _BackgroundUpdateContextManager(sleep, self._clock) + + async def _default_batch_size(self, update_name: str, database_name: str) -> int: + """The batch size to use for the first iteration of a new background + update. + """ + if self._default_batch_size_callback is not None: + return await self._default_batch_size_callback(update_name, database_name) + + return self.DEFAULT_BACKGROUND_BATCH_SIZE + + async def _min_batch_size(self, update_name: str, database_name: str) -> int: + """A lower bound on the batch size of a new background update. + + Used to ensure that progress is always made. Must be greater than 0. + """ + if self._min_batch_size_callback is not None: + return await self._min_batch_size_callback(update_name, database_name) + + return self.MINIMUM_BACKGROUND_BATCH_SIZE + def get_current_update(self) -> Optional[BackgroundUpdatePerformance]: """Returns the current background update, if any.""" @@ -122,6 +250,8 @@ class BackgroundUpdater: def start_doing_background_updates(self) -> None: if self.enabled: + # if we start a new background update, not all updates are done. + self._all_done = False run_as_background_process("background_updates", self.run_background_updates) async def run_background_updates(self, sleep: bool = True) -> None: @@ -133,13 +263,8 @@ class BackgroundUpdater: try: logger.info("Starting background schema updates") while self.enabled: - if sleep: - await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000.0) - try: - result = await self.do_next_background_update( - self.BACKGROUND_UPDATE_DURATION_MS - ) + result = await self.do_next_background_update(sleep) except Exception: logger.exception("Error doing update") else: @@ -201,13 +326,15 @@ class BackgroundUpdater: return not update_exists - async def do_next_background_update(self, desired_duration_ms: float) -> bool: + async def do_next_background_update(self, sleep: bool = True) -> bool: """Does some amount of work on the next queued background update Returns once some amount of work is done. Args: - desired_duration_ms: How long we want to spend updating. + sleep: Whether to limit how quickly we run background updates or + not. + Returns: True if we have finished running all the background updates, otherwise False """ @@ -250,7 +377,19 @@ class BackgroundUpdater: self._current_background_update = upd["update_name"] - await self._do_background_update(desired_duration_ms) + # We have a background update to run, otherwise we would have returned + # early. + assert self._current_background_update is not None + update_info = self._background_update_handlers[self._current_background_update] + + async with self._get_context_manager_for_update( + sleep=sleep, + update_name=self._current_background_update, + database_name=self._database_name, + oneshot=update_info.oneshot, + ) as desired_duration_ms: + await self._do_background_update(desired_duration_ms) + return False async def _do_background_update(self, desired_duration_ms: float) -> int: @@ -258,7 +397,7 @@ class BackgroundUpdater: update_name = self._current_background_update logger.info("Starting update batch on background update '%s'", update_name) - update_handler = self._background_update_handlers[update_name] + update_handler = self._background_update_handlers[update_name].callback performance = self._background_update_performance.get(update_name) @@ -271,9 +410,14 @@ class BackgroundUpdater: if items_per_ms is not None: batch_size = int(desired_duration_ms * items_per_ms) # Clamp the batch size so that we always make progress - batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE) + batch_size = max( + batch_size, + await self._min_batch_size(update_name, self._database_name), + ) else: - batch_size = self.DEFAULT_BACKGROUND_BATCH_SIZE + batch_size = await self._default_batch_size( + update_name, self._database_name + ) progress_json = await self.db_pool.simple_select_one_onecol( "background_updates", @@ -292,6 +436,8 @@ class BackgroundUpdater: duration_ms = time_stop - time_start + performance.update(items_updated, duration_ms) + logger.info( "Running background update %r. Processed %r items in %rms." " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)", @@ -304,8 +450,6 @@ class BackgroundUpdater: batch_size, ) - performance.update(items_updated, duration_ms) - return len(self._background_update_performance) def register_background_update_handler( @@ -329,7 +473,9 @@ class BackgroundUpdater: update_name: The name of the update that this code handles. update_handler: The function that does the update. """ - self._background_update_handlers[update_name] = update_handler + self._background_update_handlers[update_name] = _BackgroundUpdateHandler( + update_handler + ) def register_noop_background_update(self, update_name: str) -> None: """Register a noop handler for a background update. @@ -451,7 +597,9 @@ class BackgroundUpdater: await self._end_background_update(update_name) return 1 - self.register_background_update_handler(update_name, updater) + self._background_update_handlers[update_name] = _BackgroundUpdateHandler( + updater, oneshot=True + ) async def _end_background_update(self, update_name: str) -> None: """Removes a completed background update task from the queue. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index d4cab69ebf..0693d39006 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -188,7 +188,7 @@ class LoggingDatabaseConnection: # The type of entry which goes on our after_callbacks and exception_callbacks lists. -_CallbackListEntry = Tuple[Callable[..., None], Iterable[Any], Dict[str, Any]] +_CallbackListEntry = Tuple[Callable[..., object], Iterable[Any], Dict[str, Any]] R = TypeVar("R") @@ -235,7 +235,7 @@ class LoggingTransaction: self.after_callbacks = after_callbacks self.exception_callbacks = exception_callbacks - def call_after(self, callback: Callable[..., None], *args: Any, **kwargs: Any): + def call_after(self, callback: Callable[..., object], *args: Any, **kwargs: Any): """Call the given callback on the main twisted thread after the transaction has finished. Used to invalidate the caches on the correct thread. @@ -247,7 +247,7 @@ class LoggingTransaction: self.after_callbacks.append((callback, args, kwargs)) def call_on_exception( - self, callback: Callable[..., None], *args: Any, **kwargs: Any + self, callback: Callable[..., object], *args: Any, **kwargs: Any ): # if self.exception_callbacks is None, that means that whatever constructed the # LoggingTransaction isn't expecting there to be any callbacks; assert that diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 7c0f953365..ab8766c75b 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -599,6 +599,7 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" REMOVE_DELETED_DEVICES = "remove_deleted_devices_from_device_inbox" REMOVE_HIDDEN_DEVICES = "remove_hidden_devices_from_device_inbox" + REMOVE_DEAD_DEVICES_FROM_INBOX = "remove_dead_devices_from_device_inbox" def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) @@ -614,14 +615,18 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox ) - self.db_pool.updates.register_background_update_handler( - self.REMOVE_DELETED_DEVICES, - self._remove_deleted_devices_from_device_inbox, + # Used to be a background update that deletes all device_inboxes for deleted + # devices. + self.db_pool.updates.register_noop_background_update( + self.REMOVE_DELETED_DEVICES ) + # Used to be a background update that deletes all device_inboxes for hidden + # devices. + self.db_pool.updates.register_noop_background_update(self.REMOVE_HIDDEN_DEVICES) self.db_pool.updates.register_background_update_handler( - self.REMOVE_HIDDEN_DEVICES, - self._remove_hidden_devices_from_device_inbox, + self.REMOVE_DEAD_DEVICES_FROM_INBOX, + self._remove_dead_devices_from_device_inbox, ) async def _background_drop_index_device_inbox(self, progress, batch_size): @@ -636,171 +641,83 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): return 1 - async def _remove_deleted_devices_from_device_inbox( - self, progress: JsonDict, batch_size: int + async def _remove_dead_devices_from_device_inbox( + self, + progress: JsonDict, + batch_size: int, ) -> int: - """A background update that deletes all device_inboxes for deleted devices. - - This should only need to be run once (when users upgrade to v1.47.0) + """A background update to remove devices that were either deleted or hidden from + the device_inbox table. Args: - progress: JsonDict used to store progress of this background update - batch_size: the maximum number of rows to retrieve in a single select query + progress: The update's progress dict. + batch_size: The batch size for this update. Returns: - The number of deleted rows + The number of rows deleted. """ - def _remove_deleted_devices_from_device_inbox_txn( + def _remove_dead_devices_from_device_inbox_txn( txn: LoggingTransaction, - ) -> int: - """stream_id is not unique - we need to use an inclusive `stream_id >= ?` clause, - since we might not have deleted all dead device messages for the stream_id - returned from the previous query + ) -> Tuple[int, bool]: - Then delete only rows matching the `(user_id, device_id, stream_id)` tuple, - to avoid problems of deleting a large number of rows all at once - due to a single device having lots of device messages. - """ + if "max_stream_id" in progress: + max_stream_id = progress["max_stream_id"] + else: + txn.execute("SELECT max(stream_id) FROM device_inbox") + # There's a type mismatch here between how we want to type the row and + # what fetchone says it returns, but we silence it because we know that + # res can't be None. + res: Tuple[Optional[int]] = txn.fetchone() # type: ignore[assignment] + if res[0] is None: + # this can only happen if the `device_inbox` table is empty, in which + # case we have no work to do. + return 0, True + else: + max_stream_id = res[0] - last_stream_id = progress.get("stream_id", 0) + start = progress.get("stream_id", 0) + stop = start + batch_size + # delete rows in `device_inbox` which do *not* correspond to a known, + # unhidden device. sql = """ - SELECT device_id, user_id, stream_id - FROM device_inbox + DELETE FROM device_inbox WHERE - stream_id >= ? - AND (device_id, user_id) NOT IN ( - SELECT device_id, user_id FROM devices + stream_id >= ? AND stream_id < ? + AND NOT EXISTS ( + SELECT * FROM devices d + WHERE + d.device_id=device_inbox.device_id + AND d.user_id=device_inbox.user_id + AND NOT hidden ) - ORDER BY stream_id - LIMIT ? - """ - - txn.execute(sql, (last_stream_id, batch_size)) - rows = txn.fetchall() + """ - num_deleted = 0 - for row in rows: - num_deleted += self.db_pool.simple_delete_txn( - txn, - "device_inbox", - {"device_id": row[0], "user_id": row[1], "stream_id": row[2]}, - ) + txn.execute(sql, (start, stop)) - if rows: - # send more than stream_id to progress - # otherwise it can happen in large deployments that - # no change of status is visible in the log file - # it may be that the stream_id does not change in several runs - self.db_pool.updates._background_update_progress_txn( - txn, - self.REMOVE_DELETED_DEVICES, - { - "device_id": rows[-1][0], - "user_id": rows[-1][1], - "stream_id": rows[-1][2], - }, - ) - - return num_deleted - - number_deleted = await self.db_pool.runInteraction( - "_remove_deleted_devices_from_device_inbox", - _remove_deleted_devices_from_device_inbox_txn, - ) - - # The task is finished when no more lines are deleted. - if not number_deleted: - await self.db_pool.updates._end_background_update( - self.REMOVE_DELETED_DEVICES + self.db_pool.updates._background_update_progress_txn( + txn, + self.REMOVE_DEAD_DEVICES_FROM_INBOX, + { + "stream_id": stop, + "max_stream_id": max_stream_id, + }, ) - return number_deleted - - async def _remove_hidden_devices_from_device_inbox( - self, progress: JsonDict, batch_size: int - ) -> int: - """A background update that deletes all device_inboxes for hidden devices. - - This should only need to be run once (when users upgrade to v1.47.0) - - Args: - progress: JsonDict used to store progress of this background update - batch_size: the maximum number of rows to retrieve in a single select query - - Returns: - The number of deleted rows - """ - - def _remove_hidden_devices_from_device_inbox_txn( - txn: LoggingTransaction, - ) -> int: - """stream_id is not unique - we need to use an inclusive `stream_id >= ?` clause, - since we might not have deleted all hidden device messages for the stream_id - returned from the previous query - - Then delete only rows matching the `(user_id, device_id, stream_id)` tuple, - to avoid problems of deleting a large number of rows all at once - due to a single device having lots of device messages. - """ - - last_stream_id = progress.get("stream_id", 0) - - sql = """ - SELECT device_id, user_id, stream_id - FROM device_inbox - WHERE - stream_id >= ? - AND (device_id, user_id) IN ( - SELECT device_id, user_id FROM devices WHERE hidden = ? - ) - ORDER BY stream_id - LIMIT ? - """ - - txn.execute(sql, (last_stream_id, True, batch_size)) - rows = txn.fetchall() - - num_deleted = 0 - for row in rows: - num_deleted += self.db_pool.simple_delete_txn( - txn, - "device_inbox", - {"device_id": row[0], "user_id": row[1], "stream_id": row[2]}, - ) - - if rows: - # We don't just save the `stream_id` in progress as - # otherwise it can happen in large deployments that - # no change of status is visible in the log file, as - # it may be that the stream_id does not change in several runs - self.db_pool.updates._background_update_progress_txn( - txn, - self.REMOVE_HIDDEN_DEVICES, - { - "device_id": rows[-1][0], - "user_id": rows[-1][1], - "stream_id": rows[-1][2], - }, - ) - - return num_deleted + return stop > max_stream_id - number_deleted = await self.db_pool.runInteraction( - "_remove_hidden_devices_from_device_inbox", - _remove_hidden_devices_from_device_inbox_txn, + finished = await self.db_pool.runInteraction( + "_remove_devices_from_device_inbox_txn", + _remove_dead_devices_from_device_inbox_txn, ) - # The task is finished when no more lines are deleted. - if not number_deleted: + if finished: await self.db_pool.updates._end_background_update( - self.REMOVE_HIDDEN_DEVICES + self.REMOVE_DEAD_DEVICES_FROM_INBOX, ) - return number_deleted + return batch_size class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore): diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index a95ac34f09..b06c1dc45b 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -408,29 +408,58 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore): fallback_keys: the keys to set. This is a map from key ID (which is of the form "algorithm:id") to key data. """ + await self.db_pool.runInteraction( + "set_e2e_fallback_keys_txn", + self._set_e2e_fallback_keys_txn, + user_id, + device_id, + fallback_keys, + ) + + await self.invalidate_cache_and_stream( + "get_e2e_unused_fallback_key_types", (user_id, device_id) + ) + + def _set_e2e_fallback_keys_txn( + self, txn: Connection, user_id: str, device_id: str, fallback_keys: JsonDict + ) -> None: # fallback_keys will usually only have one item in it, so using a for # loop (as opposed to calling simple_upsert_many_txn) won't be too bad # FIXME: make sure that only one key per algorithm is uploaded for key_id, fallback_key in fallback_keys.items(): algorithm, key_id = key_id.split(":", 1) - await self.db_pool.simple_upsert( - "e2e_fallback_keys_json", + old_key_json = self.db_pool.simple_select_one_onecol_txn( + txn, + table="e2e_fallback_keys_json", keyvalues={ "user_id": user_id, "device_id": device_id, "algorithm": algorithm, }, - values={ - "key_id": key_id, - "key_json": json_encoder.encode(fallback_key), - "used": False, - }, - desc="set_e2e_fallback_key", + retcol="key_json", + allow_none=True, ) - await self.invalidate_cache_and_stream( - "get_e2e_unused_fallback_key_types", (user_id, device_id) - ) + new_key_json = encode_canonical_json(fallback_key).decode("utf-8") + + # If the uploaded key is the same as the current fallback key, + # don't do anything. This prevents marking the key as unused if it + # was already used. + if old_key_json != new_key_json: + self.db_pool.simple_upsert_txn( + txn, + table="e2e_fallback_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + "algorithm": algorithm, + }, + values={ + "key_id": key_id, + "key_json": json_encoder.encode(fallback_key), + "used": False, + }, + ) @cached(max_entries=10000) async def get_e2e_unused_fallback_key_types( diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 120e4807d1..c3440de2cb 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1,6 +1,6 @@ # Copyright 2014-2016 OpenMarket Ltd # Copyright 2018-2019 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ # limitations under the License. import itertools import logging -from collections import OrderedDict, namedtuple +from collections import OrderedDict from typing import ( TYPE_CHECKING, Any, @@ -41,9 +41,10 @@ from synapse.events.snapshot import EventContext # noqa: F401 from synapse.logging.utils import log_function from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import DatabasePool, LoggingTransaction +from synapse.storage.databases.main.events_worker import EventCacheEntry from synapse.storage.databases.main.search import SearchEntry from synapse.storage.types import Connection -from synapse.storage.util.id_generators import MultiWriterIdGenerator +from synapse.storage.util.id_generators import AbstractStreamIdGenerator from synapse.storage.util.sequence import SequenceGenerator from synapse.types import StateMap, get_domain_from_id from synapse.util import json_encoder @@ -64,9 +65,6 @@ event_counter = Counter( ) -_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event")) - - @attr.s(slots=True) class DeltaState: """Deltas to use to update the `current_state_events` table. @@ -108,16 +106,21 @@ class PersistEventsStore: self._ephemeral_messages_enabled = hs.config.server.enable_ephemeral_messages self.is_mine_id = hs.is_mine_id - # Ideally we'd move these ID gens here, unfortunately some other ID - # generators are chained off them so doing so is a bit of a PITA. - self._backfill_id_gen: MultiWriterIdGenerator = self.store._backfill_id_gen - self._stream_id_gen: MultiWriterIdGenerator = self.store._stream_id_gen - # This should only exist on instances that are configured to write assert ( hs.get_instance_name() in hs.config.worker.writers.events ), "Can only instantiate EventsStore on master" + # Since we have been configured to write, we ought to have id generators, + # rather than id trackers. + assert isinstance(self.store._backfill_id_gen, AbstractStreamIdGenerator) + assert isinstance(self.store._stream_id_gen, AbstractStreamIdGenerator) + + # Ideally we'd move these ID gens here, unfortunately some other ID + # generators are chained off them so doing so is a bit of a PITA. + self._backfill_id_gen: AbstractStreamIdGenerator = self.store._backfill_id_gen + self._stream_id_gen: AbstractStreamIdGenerator = self.store._stream_id_gen + async def _persist_events_and_state_updates( self, events_and_contexts: List[Tuple[EventBase, EventContext]], @@ -1553,11 +1556,13 @@ class PersistEventsStore: for row in rows: event = ev_map[row["event_id"]] if not row["rejects"] and not row["redacts"]: - to_prefill.append(_EventCacheEntry(event=event, redacted_event=None)) + to_prefill.append(EventCacheEntry(event=event, redacted_event=None)) def prefill(): for cache_entry in to_prefill: - self.store._get_event_cache.set((cache_entry[0].event_id,), cache_entry) + self.store._get_event_cache.set( + (cache_entry.event.event_id,), cache_entry + ) txn.call_after(prefill) @@ -1696,34 +1701,33 @@ class PersistEventsStore: }, ) - def _handle_event_relations(self, txn, event): - """Handles inserting relation data during peristence of events + def _handle_event_relations( + self, txn: LoggingTransaction, event: EventBase + ) -> None: + """Handles inserting relation data during persistence of events Args: - txn - event (EventBase) + txn: The current database transaction. + event: The event which might have relations. """ relation = event.content.get("m.relates_to") if not relation: # No relations return + # Relations must have a type and parent event ID. rel_type = relation.get("rel_type") - if rel_type not in ( - RelationTypes.ANNOTATION, - RelationTypes.REFERENCE, - RelationTypes.REPLACE, - RelationTypes.THREAD, - ): - # Unknown relation type + if not isinstance(rel_type, str): return parent_id = relation.get("event_id") - if not parent_id: - # Invalid relation + if not isinstance(parent_id, str): return - aggregation_key = relation.get("key") + # Annotations have a key field. + aggregation_key = None + if rel_type == RelationTypes.ANNOTATION: + aggregation_key = relation.get("key") self.db_pool.simple_insert_txn( txn, diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index ae3a8a63e4..c88fd35e7f 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1,4 +1,4 @@ -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -171,8 +171,14 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): self._purged_chain_cover_index, ) + # The event_thread_relation background update was replaced with the + # event_arbitrary_relations one, which handles any relation to avoid + # needed to potentially crawl the entire events table in the future. + self.db_pool.updates.register_noop_background_update("event_thread_relation") + self.db_pool.updates.register_background_update_handler( - "event_thread_relation", self._event_thread_relation + "event_arbitrary_relations", + self._event_arbitrary_relations, ) ################################################################################ @@ -1099,23 +1105,27 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): return result - async def _event_thread_relation(self, progress: JsonDict, batch_size: int) -> int: - """Background update handler which will store thread relations for existing events.""" + async def _event_arbitrary_relations( + self, progress: JsonDict, batch_size: int + ) -> int: + """Background update handler which will store previously unknown relations for existing events.""" last_event_id = progress.get("last_event_id", "") - def _event_thread_relation_txn(txn: LoggingTransaction) -> int: + def _event_arbitrary_relations_txn(txn: LoggingTransaction) -> int: + # Fetch events and then filter based on whether the event has a + # relation or not. txn.execute( """ SELECT event_id, json FROM event_json - LEFT JOIN event_relations USING (event_id) - WHERE event_id > ? AND event_relations.event_id IS NULL + WHERE event_id > ? ORDER BY event_id LIMIT ? """, (last_event_id, batch_size), ) results = list(txn) - missing_thread_relations = [] + # (event_id, parent_id, rel_type) for each relation + relations_to_insert: List[Tuple[str, str, str]] = [] for (event_id, event_json_raw) in results: try: event_json = db_to_json(event_json_raw) @@ -1127,48 +1137,70 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): ) continue - # If there's no relation (or it is not a thread), skip! + # If there's no relation, skip! relates_to = event_json["content"].get("m.relates_to") if not relates_to or not isinstance(relates_to, dict): continue - if relates_to.get("rel_type") != RelationTypes.THREAD: + + # If the relation type or parent event ID is not a string, skip it. + # + # Do not consider relation types that have existed for a long time, + # since they will already be listed in the `event_relations` table. + rel_type = relates_to.get("rel_type") + if not isinstance(rel_type, str) or rel_type in ( + RelationTypes.ANNOTATION, + RelationTypes.REFERENCE, + RelationTypes.REPLACE, + ): continue - # Get the parent ID. parent_id = relates_to.get("event_id") if not isinstance(parent_id, str): continue - missing_thread_relations.append((event_id, parent_id)) + relations_to_insert.append((event_id, parent_id, rel_type)) + + # Insert the missing data, note that we upsert here in case the event + # has already been processed. + if relations_to_insert: + self.db_pool.simple_upsert_many_txn( + txn=txn, + table="event_relations", + key_names=("event_id",), + key_values=[(r[0],) for r in relations_to_insert], + value_names=("relates_to_id", "relation_type"), + value_values=[r[1:] for r in relations_to_insert], + ) - # Insert the missing data. - self.db_pool.simple_insert_many_txn( - txn=txn, - table="event_relations", - values=[ - { - "event_id": event_id, - "relates_to_Id": parent_id, - "relation_type": RelationTypes.THREAD, - } - for event_id, parent_id in missing_thread_relations - ], - ) + # Iterate the parent IDs and invalidate caches. + for parent_id in {r[1] for r in relations_to_insert}: + cache_tuple = (parent_id,) + self._invalidate_cache_and_stream( + txn, self.get_relations_for_event, cache_tuple + ) + self._invalidate_cache_and_stream( + txn, self.get_aggregation_groups_for_event, cache_tuple + ) + self._invalidate_cache_and_stream( + txn, self.get_thread_summary, cache_tuple + ) if results: latest_event_id = results[-1][0] self.db_pool.updates._background_update_progress_txn( - txn, "event_thread_relation", {"last_event_id": latest_event_id} + txn, "event_arbitrary_relations", {"last_event_id": latest_event_id} ) return len(results) num_rows = await self.db_pool.runInteraction( - desc="event_thread_relation", func=_event_thread_relation_txn + desc="event_arbitrary_relations", func=_event_arbitrary_relations_txn ) if not num_rows: - await self.db_pool.updates._end_background_update("event_thread_relation") + await self.db_pool.updates._end_background_update( + "event_arbitrary_relations" + ) return num_rows diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index c6bf316d5b..4cefc0a07e 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -15,14 +15,18 @@ import logging import threading from typing import ( + TYPE_CHECKING, + Any, Collection, Container, Dict, Iterable, List, + NoReturn, Optional, Set, Tuple, + cast, overload, ) @@ -38,6 +42,7 @@ from synapse.api.errors import NotFoundError, SynapseError from synapse.api.room_versions import ( KNOWN_ROOM_VERSIONS, EventFormatVersions, + RoomVersion, RoomVersions, ) from synapse.events import EventBase, make_event_from_dict @@ -56,10 +61,18 @@ from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import BackfillStream from synapse.replication.tcp.streams.events import EventsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool, LoggingTransaction +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.engines import PostgresEngine -from synapse.storage.types import Connection -from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator +from synapse.storage.types import Cursor +from synapse.storage.util.id_generators import ( + AbstractStreamIdTracker, + MultiWriterIdGenerator, + StreamIdGenerator, +) from synapse.storage.util.sequence import build_sequence_generator from synapse.types import JsonDict, get_domain_from_id from synapse.util import unwrapFirstError @@ -69,10 +82,13 @@ from synapse.util.caches.lrucache import LruCache from synapse.util.iterutils import batch_iter from synapse.util.metrics import Measure +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) -# These values are used in the `enqueus_event` and `_do_fetch` methods to +# These values are used in the `enqueue_event` and `_fetch_loop` methods to # control how we batch/bulk fetch events from the database. # The values are plucked out of thing air to make initial sync run faster # on jki.re @@ -89,7 +105,7 @@ event_fetch_ongoing_gauge = Gauge( @attr.s(slots=True, auto_attribs=True) -class _EventCacheEntry: +class EventCacheEntry: event: EventBase redacted_event: Optional[EventBase] @@ -129,7 +145,7 @@ class _EventRow: json: str internal_metadata: str format_version: Optional[int] - room_version_id: Optional[int] + room_version_id: Optional[str] rejected_reason: Optional[str] redactions: List[str] outlier: bool @@ -153,9 +169,16 @@ class EventsWorkerStore(SQLBaseStore): # options controlling this. USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING = True - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) + self._stream_id_gen: AbstractStreamIdTracker + self._backfill_id_gen: AbstractStreamIdTracker if isinstance(database.engine, PostgresEngine): # If we're using Postgres than we can use `MultiWriterIdGenerator` # regardless of whether this process writes to the streams or not. @@ -214,7 +237,7 @@ class EventsWorkerStore(SQLBaseStore): 5 * 60 * 1000, ) - self._get_event_cache = LruCache( + self._get_event_cache: LruCache[Tuple[str], EventCacheEntry] = LruCache( cache_name="*getEvent*", max_size=hs.config.caches.event_cache_size, ) @@ -223,19 +246,21 @@ class EventsWorkerStore(SQLBaseStore): # ID to cache entry. Note that the returned dict may not have the # requested event in it if the event isn't in the DB. self._current_event_fetches: Dict[ - str, ObservableDeferred[Dict[str, _EventCacheEntry]] + str, ObservableDeferred[Dict[str, EventCacheEntry]] ] = {} self._event_fetch_lock = threading.Condition() - self._event_fetch_list = [] + self._event_fetch_list: List[ + Tuple[Iterable[str], "defer.Deferred[Dict[str, _EventRow]]"] + ] = [] self._event_fetch_ongoing = 0 event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) # We define this sequence here so that it can be referenced from both # the DataStore and PersistEventStore. - def get_chain_id_txn(txn): + def get_chain_id_txn(txn: Cursor) -> int: txn.execute("SELECT COALESCE(max(chain_id), 0) FROM event_auth_chains") - return txn.fetchone()[0] + return cast(Tuple[int], txn.fetchone())[0] self.event_chain_id_gen = build_sequence_generator( db_conn, @@ -246,7 +271,13 @@ class EventsWorkerStore(SQLBaseStore): id_column="chain_id", ) - def process_replication_rows(self, stream_name, instance_name, token, rows): + def process_replication_rows( + self, + stream_name: str, + instance_name: str, + token: int, + rows: Iterable[Any], + ) -> None: if stream_name == EventsStream.NAME: self._stream_id_gen.advance(instance_name, token) elif stream_name == BackfillStream.NAME: @@ -280,10 +311,10 @@ class EventsWorkerStore(SQLBaseStore): self, event_id: str, redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT, - get_prev_content: bool = False, - allow_rejected: bool = False, - allow_none: Literal[False] = False, - check_room_id: Optional[str] = None, + get_prev_content: bool = ..., + allow_rejected: bool = ..., + allow_none: Literal[False] = ..., + check_room_id: Optional[str] = ..., ) -> EventBase: ... @@ -292,10 +323,10 @@ class EventsWorkerStore(SQLBaseStore): self, event_id: str, redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT, - get_prev_content: bool = False, - allow_rejected: bool = False, - allow_none: Literal[True] = False, - check_room_id: Optional[str] = None, + get_prev_content: bool = ..., + allow_rejected: bool = ..., + allow_none: Literal[True] = ..., + check_room_id: Optional[str] = ..., ) -> Optional[EventBase]: ... @@ -357,7 +388,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_events( self, - event_ids: Iterable[str], + event_ids: Collection[str], redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT, get_prev_content: bool = False, allow_rejected: bool = False, @@ -544,7 +575,7 @@ class EventsWorkerStore(SQLBaseStore): async def _get_events_from_cache_or_db( self, event_ids: Iterable[str], allow_rejected: bool = False - ) -> Dict[str, _EventCacheEntry]: + ) -> Dict[str, EventCacheEntry]: """Fetch a bunch of events from the cache or the database. If events are pulled from the database, they will be cached for future lookups. @@ -578,7 +609,7 @@ class EventsWorkerStore(SQLBaseStore): # same dict into itself N times). already_fetching_ids: Set[str] = set() already_fetching_deferreds: Set[ - ObservableDeferred[Dict[str, _EventCacheEntry]] + ObservableDeferred[Dict[str, EventCacheEntry]] ] = set() for event_id in missing_events_ids: @@ -601,8 +632,8 @@ class EventsWorkerStore(SQLBaseStore): # function returning more events than requested, but that can happen # already due to `_get_events_from_db`). fetching_deferred: ObservableDeferred[ - Dict[str, _EventCacheEntry] - ] = ObservableDeferred(defer.Deferred()) + Dict[str, EventCacheEntry] + ] = ObservableDeferred(defer.Deferred(), consumeErrors=True) for event_id in missing_events_ids: self._current_event_fetches[event_id] = fetching_deferred @@ -658,12 +689,12 @@ class EventsWorkerStore(SQLBaseStore): return event_entry_map - def _invalidate_get_event_cache(self, event_id): + def _invalidate_get_event_cache(self, event_id: str) -> None: self._get_event_cache.invalidate((event_id,)) def _get_events_from_cache( self, events: Iterable[str], update_metrics: bool = True - ) -> Dict[str, _EventCacheEntry]: + ) -> Dict[str, EventCacheEntry]: """Fetch events from the caches. May return rejected events. @@ -736,38 +767,123 @@ class EventsWorkerStore(SQLBaseStore): for e in state_to_include.values() ] - def _do_fetch(self, conn: Connection) -> None: + def _maybe_start_fetch_thread(self) -> None: + """Starts an event fetch thread if we are not yet at the maximum number.""" + with self._event_fetch_lock: + if ( + self._event_fetch_list + and self._event_fetch_ongoing < EVENT_QUEUE_THREADS + ): + self._event_fetch_ongoing += 1 + event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) + # `_event_fetch_ongoing` is decremented in `_fetch_thread`. + should_start = True + else: + should_start = False + + if should_start: + run_as_background_process("fetch_events", self._fetch_thread) + + async def _fetch_thread(self) -> None: + """Services requests for events from `_event_fetch_list`.""" + exc = None + try: + await self.db_pool.runWithConnection(self._fetch_loop) + except BaseException as e: + exc = e + raise + finally: + should_restart = False + event_fetches_to_fail = [] + with self._event_fetch_lock: + self._event_fetch_ongoing -= 1 + event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) + + # There may still be work remaining in `_event_fetch_list` if we + # failed, or it was added in between us deciding to exit and + # decrementing `_event_fetch_ongoing`. + if self._event_fetch_list: + if exc is None: + # We decided to exit, but then some more work was added + # before `_event_fetch_ongoing` was decremented. + # If a new event fetch thread was not started, we should + # restart ourselves since the remaining event fetch threads + # may take a while to get around to the new work. + # + # Unfortunately it is not possible to tell whether a new + # event fetch thread was started, so we restart + # unconditionally. If we are unlucky, we will end up with + # an idle fetch thread, but it will time out after + # `EVENT_QUEUE_ITERATIONS * EVENT_QUEUE_TIMEOUT_S` seconds + # in any case. + # + # Note that multiple fetch threads may run down this path at + # the same time. + should_restart = True + elif isinstance(exc, Exception): + if self._event_fetch_ongoing == 0: + # We were the last remaining fetcher and failed. + # Fail any outstanding fetches since no one else will + # handle them. + event_fetches_to_fail = self._event_fetch_list + self._event_fetch_list = [] + else: + # We weren't the last remaining fetcher, so another + # fetcher will pick up the work. This will either happen + # after their existing work, however long that takes, + # or after at most `EVENT_QUEUE_TIMEOUT_S` seconds if + # they are idle. + pass + else: + # The exception is a `SystemExit`, `KeyboardInterrupt` or + # `GeneratorExit`. Don't try to do anything clever here. + pass + + if should_restart: + # We exited cleanly but noticed more work. + self._maybe_start_fetch_thread() + + if event_fetches_to_fail: + # We were the last remaining fetcher and failed. + # Fail any outstanding fetches since no one else will handle them. + assert exc is not None + with PreserveLoggingContext(): + for _, deferred in event_fetches_to_fail: + deferred.errback(exc) + + def _fetch_loop(self, conn: LoggingDatabaseConnection) -> None: """Takes a database connection and waits for requests for events from the _event_fetch_list queue. """ - try: - i = 0 - while True: - with self._event_fetch_lock: - event_list = self._event_fetch_list - self._event_fetch_list = [] - - if not event_list: - single_threaded = self.database_engine.single_threaded - if ( - not self.USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING - or single_threaded - or i > EVENT_QUEUE_ITERATIONS - ): - break - else: - self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) - i += 1 - continue - i = 0 + i = 0 + while True: + with self._event_fetch_lock: + event_list = self._event_fetch_list + self._event_fetch_list = [] + + if not event_list: + # There are no requests waiting. If we haven't yet reached the + # maximum iteration limit, wait for some more requests to turn up. + # Otherwise, bail out. + single_threaded = self.database_engine.single_threaded + if ( + not self.USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING + or single_threaded + or i > EVENT_QUEUE_ITERATIONS + ): + return + + self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) + i += 1 + continue + i = 0 - self._fetch_event_list(conn, event_list) - finally: - self._event_fetch_ongoing -= 1 - event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) + self._fetch_event_list(conn, event_list) def _fetch_event_list( - self, conn: Connection, event_list: List[Tuple[List[str], defer.Deferred]] + self, + conn: LoggingDatabaseConnection, + event_list: List[Tuple[Iterable[str], "defer.Deferred[Dict[str, _EventRow]]"]], ) -> None: """Handle a load of requests from the _event_fetch_list queue @@ -794,7 +910,7 @@ class EventsWorkerStore(SQLBaseStore): ) # We only want to resolve deferreds from the main thread - def fire(): + def fire() -> None: for _, d in event_list: d.callback(row_dict) @@ -804,18 +920,16 @@ class EventsWorkerStore(SQLBaseStore): logger.exception("do_fetch") # We only want to resolve deferreds from the main thread - def fire(evs, exc): - for _, d in evs: - if not d.called: - with PreserveLoggingContext(): - d.errback(exc) + def fire_errback(exc: Exception) -> None: + for _, d in event_list: + d.errback(exc) with PreserveLoggingContext(): - self.hs.get_reactor().callFromThread(fire, event_list, e) + self.hs.get_reactor().callFromThread(fire_errback, e) async def _get_events_from_db( - self, event_ids: Iterable[str] - ) -> Dict[str, _EventCacheEntry]: + self, event_ids: Collection[str] + ) -> Dict[str, EventCacheEntry]: """Fetch a bunch of events from the database. May return rejected events. @@ -831,29 +945,29 @@ class EventsWorkerStore(SQLBaseStore): map from event id to result. May return extra events which weren't asked for. """ - fetched_events = {} + fetched_event_ids: Set[str] = set() + fetched_events: Dict[str, _EventRow] = {} events_to_fetch = event_ids while events_to_fetch: row_map = await self._enqueue_events(events_to_fetch) # we need to recursively fetch any redactions of those events - redaction_ids = set() + redaction_ids: Set[str] = set() for event_id in events_to_fetch: row = row_map.get(event_id) - fetched_events[event_id] = row + fetched_event_ids.add(event_id) if row: + fetched_events[event_id] = row redaction_ids.update(row.redactions) - events_to_fetch = redaction_ids.difference(fetched_events.keys()) + events_to_fetch = redaction_ids.difference(fetched_event_ids) if events_to_fetch: logger.debug("Also fetching redaction events %s", events_to_fetch) # build a map from event_id to EventBase - event_map = {} + event_map: Dict[str, EventBase] = {} for event_id, row in fetched_events.items(): - if not row: - continue assert row.event_id == event_id rejected_reason = row.rejected_reason @@ -881,6 +995,7 @@ class EventsWorkerStore(SQLBaseStore): room_version_id = row.room_version_id + room_version: Optional[RoomVersion] if not room_version_id: # this should only happen for out-of-band membership events which # arrived before #6983 landed. For all other events, we should have @@ -951,14 +1066,14 @@ class EventsWorkerStore(SQLBaseStore): # finally, we can decide whether each one needs redacting, and build # the cache entries. - result_map = {} + result_map: Dict[str, EventCacheEntry] = {} for event_id, original_ev in event_map.items(): redactions = fetched_events[event_id].redactions redacted_event = self._maybe_redact_event_row( original_ev, redactions, event_map ) - cache_entry = _EventCacheEntry( + cache_entry = EventCacheEntry( event=original_ev, redacted_event=redacted_event ) @@ -967,7 +1082,7 @@ class EventsWorkerStore(SQLBaseStore): return result_map - async def _enqueue_events(self, events: Iterable[str]) -> Dict[str, _EventRow]: + async def _enqueue_events(self, events: Collection[str]) -> Dict[str, _EventRow]: """Fetches events from the database using the _event_fetch_list. This allows batch and bulk fetching of events - it allows us to fetch events without having to create a new transaction for each request for events. @@ -980,23 +1095,12 @@ class EventsWorkerStore(SQLBaseStore): that weren't requested. """ - events_d = defer.Deferred() + events_d: "defer.Deferred[Dict[str, _EventRow]]" = defer.Deferred() with self._event_fetch_lock: self._event_fetch_list.append((events, events_d)) - self._event_fetch_lock.notify() - if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: - self._event_fetch_ongoing += 1 - event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) - should_start = True - else: - should_start = False - - if should_start: - run_as_background_process( - "fetch_events", self.db_pool.runWithConnection, self._do_fetch - ) + self._maybe_start_fetch_thread() logger.debug("Loading %d events: %s", len(events), events) with PreserveLoggingContext(): @@ -1146,7 +1250,7 @@ class EventsWorkerStore(SQLBaseStore): # no valid redaction found for this event return None - async def have_events_in_timeline(self, event_ids): + async def have_events_in_timeline(self, event_ids: Iterable[str]) -> Set[str]: """Given a list of event ids, check if we have already processed and stored them as non outliers. """ @@ -1175,7 +1279,7 @@ class EventsWorkerStore(SQLBaseStore): event_ids: events we are looking for Returns: - set[str]: The events we have already seen. + The set of events we have already seen. """ res = await self._have_seen_events_dict( (room_id, event_id) for event_id in event_ids @@ -1198,7 +1302,9 @@ class EventsWorkerStore(SQLBaseStore): } results = {x: True for x in cache_results} - def have_seen_events_txn(txn, chunk: Tuple[Tuple[str, str], ...]): + def have_seen_events_txn( + txn: LoggingTransaction, chunk: Tuple[Tuple[str, str], ...] + ) -> None: # we deliberately do *not* query the database for room_id, to make the # query an index-only lookup on `events_event_id_key`. # @@ -1224,12 +1330,14 @@ class EventsWorkerStore(SQLBaseStore): return results @cached(max_entries=100000, tree=True) - async def have_seen_event(self, room_id: str, event_id: str): + async def have_seen_event(self, room_id: str, event_id: str) -> NoReturn: # this only exists for the benefit of the @cachedList descriptor on # _have_seen_events_dict raise NotImplementedError() - def _get_current_state_event_counts_txn(self, txn, room_id): + def _get_current_state_event_counts_txn( + self, txn: LoggingTransaction, room_id: str + ) -> int: """ See get_current_state_event_counts. """ @@ -1254,7 +1362,7 @@ class EventsWorkerStore(SQLBaseStore): room_id, ) - async def get_room_complexity(self, room_id): + async def get_room_complexity(self, room_id: str) -> Dict[str, float]: """ Get a rough approximation of the complexity of the room. This is used by remote servers to decide whether they wish to join the room or not. @@ -1262,10 +1370,10 @@ class EventsWorkerStore(SQLBaseStore): more resources. Args: - room_id (str) + room_id: The room ID to query. Returns: - dict[str:int] of complexity version to complexity. + dict[str:float] of complexity version to complexity. """ state_events = await self.get_current_state_event_counts(room_id) @@ -1275,13 +1383,13 @@ class EventsWorkerStore(SQLBaseStore): return {"v1": complexity_v1} - def get_current_events_token(self): + def get_current_events_token(self) -> int: """The current maximum token that events have reached""" return self._stream_id_gen.get_current_token() async def get_all_new_forward_event_rows( self, instance_name: str, last_id: int, current_id: int, limit: int - ) -> List[Tuple]: + ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: """Returns new events, for the Events replication stream Args: @@ -1295,7 +1403,9 @@ class EventsWorkerStore(SQLBaseStore): EventsStreamRow. """ - def get_all_new_forward_event_rows(txn): + def get_all_new_forward_event_rows( + txn: LoggingTransaction, + ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: sql = ( "SELECT e.stream_ordering, e.event_id, e.room_id, e.type," " state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" @@ -1311,7 +1421,9 @@ class EventsWorkerStore(SQLBaseStore): " LIMIT ?" ) txn.execute(sql, (last_id, current_id, instance_name, limit)) - return txn.fetchall() + return cast( + List[Tuple[int, str, str, str, str, str, str, str, str]], txn.fetchall() + ) return await self.db_pool.runInteraction( "get_all_new_forward_event_rows", get_all_new_forward_event_rows @@ -1319,7 +1431,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_ex_outlier_stream_rows( self, instance_name: str, last_id: int, current_id: int - ) -> List[Tuple]: + ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: """Returns de-outliered events, for the Events replication stream Args: @@ -1332,7 +1444,9 @@ class EventsWorkerStore(SQLBaseStore): EventsStreamRow. """ - def get_ex_outlier_stream_rows_txn(txn): + def get_ex_outlier_stream_rows_txn( + txn: LoggingTransaction, + ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: sql = ( "SELECT event_stream_ordering, e.event_id, e.room_id, e.type," " state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" @@ -1350,7 +1464,9 @@ class EventsWorkerStore(SQLBaseStore): ) txn.execute(sql, (last_id, current_id, instance_name)) - return txn.fetchall() + return cast( + List[Tuple[int, str, str, str, str, str, str, str, str]], txn.fetchall() + ) return await self.db_pool.runInteraction( "get_ex_outlier_stream_rows", get_ex_outlier_stream_rows_txn @@ -1358,7 +1474,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_all_new_backfill_event_rows( self, instance_name: str, last_id: int, current_id: int, limit: int - ) -> Tuple[List[Tuple[int, list]], int, bool]: + ) -> Tuple[List[Tuple[int, Tuple[str, str, str, str, str, str]]], int, bool]: """Get updates for backfill replication stream, including all new backfilled events and events that have gone from being outliers to not. @@ -1386,7 +1502,9 @@ class EventsWorkerStore(SQLBaseStore): if last_id == current_id: return [], current_id, False - def get_all_new_backfill_event_rows(txn): + def get_all_new_backfill_event_rows( + txn: LoggingTransaction, + ) -> Tuple[List[Tuple[int, Tuple[str, str, str, str, str, str]]], int, bool]: sql = ( "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type," " state_key, redacts, relates_to_id" @@ -1400,7 +1518,15 @@ class EventsWorkerStore(SQLBaseStore): " LIMIT ?" ) txn.execute(sql, (-last_id, -current_id, instance_name, limit)) - new_event_updates = [(row[0], row[1:]) for row in txn] + new_event_updates: List[ + Tuple[int, Tuple[str, str, str, str, str, str]] + ] = [] + row: Tuple[int, str, str, str, str, str, str] + # Type safety: iterating over `txn` yields `Tuple`, i.e. + # `Tuple[Any, ...]` of arbitrary length. Mypy detects assigning a + # variadic tuple to a fixed length tuple and flags it up as an error. + for row in txn: # type: ignore[assignment] + new_event_updates.append((row[0], row[1:])) limited = False if len(new_event_updates) == limit: @@ -1423,7 +1549,11 @@ class EventsWorkerStore(SQLBaseStore): " ORDER BY event_stream_ordering DESC" ) txn.execute(sql, (-last_id, -upper_bound, instance_name)) - new_event_updates.extend((row[0], row[1:]) for row in txn) + # Type safety: iterating over `txn` yields `Tuple`, i.e. + # `Tuple[Any, ...]` of arbitrary length. Mypy detects assigning a + # variadic tuple to a fixed length tuple and flags it up as an error. + for row in txn: # type: ignore[assignment] + new_event_updates.append((row[0], row[1:])) if len(new_event_updates) >= limit: upper_bound = new_event_updates[-1][0] @@ -1437,7 +1567,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_all_updated_current_state_deltas( self, instance_name: str, from_token: int, to_token: int, target_row_count: int - ) -> Tuple[List[Tuple], int, bool]: + ) -> Tuple[List[Tuple[int, str, str, str, str]], int, bool]: """Fetch updates from current_state_delta_stream Args: @@ -1457,7 +1587,9 @@ class EventsWorkerStore(SQLBaseStore): * `limited` is whether there are more updates to fetch. """ - def get_all_updated_current_state_deltas_txn(txn): + def get_all_updated_current_state_deltas_txn( + txn: LoggingTransaction, + ) -> List[Tuple[int, str, str, str, str]]: sql = """ SELECT stream_id, room_id, type, state_key, event_id FROM current_state_delta_stream @@ -1466,21 +1598,23 @@ class EventsWorkerStore(SQLBaseStore): ORDER BY stream_id ASC LIMIT ? """ txn.execute(sql, (from_token, to_token, instance_name, target_row_count)) - return txn.fetchall() + return cast(List[Tuple[int, str, str, str, str]], txn.fetchall()) - def get_deltas_for_stream_id_txn(txn, stream_id): + def get_deltas_for_stream_id_txn( + txn: LoggingTransaction, stream_id: int + ) -> List[Tuple[int, str, str, str, str]]: sql = """ SELECT stream_id, room_id, type, state_key, event_id FROM current_state_delta_stream WHERE stream_id = ? """ txn.execute(sql, [stream_id]) - return txn.fetchall() + return cast(List[Tuple[int, str, str, str, str]], txn.fetchall()) # we need to make sure that, for every stream id in the results, we get *all* # the rows with that stream id. - rows: List[Tuple] = await self.db_pool.runInteraction( + rows: List[Tuple[int, str, str, str, str]] = await self.db_pool.runInteraction( "get_all_updated_current_state_deltas", get_all_updated_current_state_deltas_txn, ) @@ -1509,14 +1643,14 @@ class EventsWorkerStore(SQLBaseStore): return rows, to_token, True - async def is_event_after(self, event_id1, event_id2): + async def is_event_after(self, event_id1: str, event_id2: str) -> bool: """Returns True if event_id1 is after event_id2 in the stream""" to_1, so_1 = await self.get_event_ordering(event_id1) to_2, so_2 = await self.get_event_ordering(event_id2) return (to_1, so_1) > (to_2, so_2) @cached(max_entries=5000) - async def get_event_ordering(self, event_id): + async def get_event_ordering(self, event_id: str) -> Tuple[int, int]: res = await self.db_pool.simple_select_one( table="events", retcols=["topological_ordering", "stream_ordering"], @@ -1539,7 +1673,9 @@ class EventsWorkerStore(SQLBaseStore): None otherwise. """ - def get_next_event_to_expire_txn(txn): + def get_next_event_to_expire_txn( + txn: LoggingTransaction, + ) -> Optional[Tuple[str, int]]: txn.execute( """ SELECT event_id, expiry_ts FROM event_expiry @@ -1547,7 +1683,7 @@ class EventsWorkerStore(SQLBaseStore): """ ) - return txn.fetchone() + return cast(Optional[Tuple[str, int]], txn.fetchone()) return await self.db_pool.runInteraction( desc="get_next_event_to_expire", func=get_next_event_to_expire_txn @@ -1611,10 +1747,10 @@ class EventsWorkerStore(SQLBaseStore): return mapping @wrap_as_background_process("_cleanup_old_transaction_ids") - async def _cleanup_old_transaction_ids(self): + async def _cleanup_old_transaction_ids(self) -> None: """Cleans out transaction id mappings older than 24hrs.""" - def _cleanup_old_transaction_ids_txn(txn): + def _cleanup_old_transaction_ids_txn(txn: LoggingTransaction) -> None: sql = """ DELETE FROM event_txn_id WHERE inserted_ts < ? diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index fa782023d4..3b63267395 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -28,7 +28,10 @@ from synapse.storage.databases.main.receipts import ReceiptsWorkerStore from synapse.storage.databases.main.roommember import RoomMemberWorkerStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException -from synapse.storage.util.id_generators import StreamIdGenerator +from synapse.storage.util.id_generators import ( + AbstractStreamIdTracker, + StreamIdGenerator, +) from synapse.util import json_encoder from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -82,9 +85,9 @@ class PushRulesWorkerStore( super().__init__(database, db_conn, hs) if hs.config.worker.worker_app is None: - self._push_rules_stream_id_gen: Union[ - StreamIdGenerator, SlavedIdTracker - ] = StreamIdGenerator(db_conn, "push_rules_stream", "stream_id") + self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator( + db_conn, "push_rules_stream", "stream_id" + ) else: self._push_rules_stream_id_gen = SlavedIdTracker( db_conn, "push_rules_stream", "stream_id" diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 5e55440570..e1ddf06916 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -84,28 +84,37 @@ class TokenLookupResult: return self.user_id -@attr.s(frozen=True, slots=True) +@attr.s(auto_attribs=True, frozen=True, slots=True) class RefreshTokenLookupResult: """Result of looking up a refresh token.""" - user_id = attr.ib(type=str) + user_id: str """The user this token belongs to.""" - device_id = attr.ib(type=str) + device_id: str """The device associated with this refresh token.""" - token_id = attr.ib(type=int) + token_id: int """The ID of this refresh token.""" - next_token_id = attr.ib(type=Optional[int]) + next_token_id: Optional[int] """The ID of the refresh token which replaced this one.""" - has_next_refresh_token_been_refreshed = attr.ib(type=bool) + has_next_refresh_token_been_refreshed: bool """True if the next refresh token was used for another refresh.""" - has_next_access_token_been_used = attr.ib(type=bool) + has_next_access_token_been_used: bool """True if the next access token was already used at least once.""" + expiry_ts: Optional[int] + """The time at which the refresh token expires and can not be used. + If None, the refresh token doesn't expire.""" + + ultimate_session_expiry_ts: Optional[int] + """The time at which the session comes to an end and can no longer be + refreshed. + If None, the session can be refreshed indefinitely.""" + class RegistrationWorkerStore(CacheInvalidationWorkerStore): def __init__( @@ -1198,8 +1207,9 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): expiration_ts = now_ms + self._account_validity_period if use_delta: + assert self._account_validity_startup_job_max_delta is not None expiration_ts = random.randrange( - expiration_ts - self._account_validity_startup_job_max_delta, + int(expiration_ts - self._account_validity_startup_job_max_delta), expiration_ts, ) @@ -1625,8 +1635,10 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): rt.user_id, rt.device_id, rt.next_token_id, - (nrt.next_token_id IS NOT NULL) has_next_refresh_token_been_refreshed, - at.used has_next_access_token_been_used + (nrt.next_token_id IS NOT NULL) AS has_next_refresh_token_been_refreshed, + at.used AS has_next_access_token_been_used, + rt.expiry_ts, + rt.ultimate_session_expiry_ts FROM refresh_tokens rt LEFT JOIN refresh_tokens nrt ON rt.next_token_id = nrt.id LEFT JOIN access_tokens at ON at.refresh_token_id = nrt.id @@ -1647,6 +1659,8 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): has_next_refresh_token_been_refreshed=row[4], # This column is nullable, ensure it's a boolean has_next_access_token_been_used=(row[5] or False), + expiry_ts=row[6], + ultimate_session_expiry_ts=row[7], ) return await self.db_pool.runInteraction( @@ -1728,11 +1742,11 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): ) self.db_pool.updates.register_background_update_handler( - "user_threepids_grandfather", self._bg_user_threepids_grandfather + "users_set_deactivated_flag", self._background_update_set_deactivated_flag ) - self.db_pool.updates.register_background_update_handler( - "users_set_deactivated_flag", self._background_update_set_deactivated_flag + self.db_pool.updates.register_noop_background_update( + "user_threepids_grandfather" ) self.db_pool.updates.register_background_index_update( @@ -1805,35 +1819,6 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): return nb_processed - async def _bg_user_threepids_grandfather(self, progress, batch_size): - """We now track which identity servers a user binds their 3PID to, so - we need to handle the case of existing bindings where we didn't track - this. - - We do this by grandfathering in existing user threepids assuming that - they used one of the server configured trusted identity servers. - """ - id_servers = set(self.config.registration.trusted_third_party_id_servers) - - def _bg_user_threepids_grandfather_txn(txn): - sql = """ - INSERT INTO user_threepid_id_server - (user_id, medium, address, id_server) - SELECT user_id, medium, address, ? - FROM user_threepids - """ - - txn.execute_batch(sql, [(id_server,) for id_server in id_servers]) - - if id_servers: - await self.db_pool.runInteraction( - "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn - ) - - await self.db_pool.updates._end_background_update("user_threepids_grandfather") - - return 1 - async def set_user_deactivated_status( self, user_id: str, deactivated: bool ) -> None: @@ -1943,6 +1928,8 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): user_id: str, token: str, device_id: Optional[str], + expiry_ts: Optional[int], + ultimate_session_expiry_ts: Optional[int], ) -> int: """Adds a refresh token for the given user. @@ -1950,6 +1937,13 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): user_id: The user ID. token: The new access token to add. device_id: ID of the device to associate with the refresh token. + expiry_ts (milliseconds since the epoch): Time after which the + refresh token cannot be used. + If None, the refresh token never expires until it has been used. + ultimate_session_expiry_ts (milliseconds since the epoch): + Time at which the session will end and can not be extended any + further. + If None, the session can be refreshed indefinitely. Raises: StoreError if there was a problem adding this. Returns: @@ -1965,6 +1959,8 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): "device_id": device_id, "token": token, "next_token_id": None, + "expiry_ts": expiry_ts, + "ultimate_session_expiry_ts": ultimate_session_expiry_ts, }, desc="add_refresh_token_to_user", ) diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 907af10995..0a43acda07 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -132,6 +132,69 @@ class RelationsWorkerStore(SQLBaseStore): "get_recent_references_for_event", _get_recent_references_for_event_txn ) + async def event_includes_relation(self, event_id: str) -> bool: + """Check if the given event relates to another event. + + An event has a relation if it has a valid m.relates_to with a rel_type + and event_id in the content: + + { + "content": { + "m.relates_to": { + "rel_type": "m.replace", + "event_id": "$other_event_id" + } + } + } + + Args: + event_id: The event to check. + + Returns: + True if the event includes a valid relation. + """ + + result = await self.db_pool.simple_select_one_onecol( + table="event_relations", + keyvalues={"event_id": event_id}, + retcol="event_id", + allow_none=True, + desc="event_includes_relation", + ) + return result is not None + + async def event_is_target_of_relation(self, parent_id: str) -> bool: + """Check if the given event is the target of another event's relation. + + An event is the target of an event relation if it has a valid + m.relates_to with a rel_type and event_id pointing to parent_id in the + content: + + { + "content": { + "m.relates_to": { + "rel_type": "m.replace", + "event_id": "$parent_id" + } + } + } + + Args: + parent_id: The event to check. + + Returns: + True if the event is the target of another event's relation. + """ + + result = await self.db_pool.simple_select_one_onecol( + table="event_relations", + keyvalues={"relates_to_id": parent_id}, + retcol="event_id", + allow_none=True, + desc="event_is_target_of_relation", + ) + return result is not None + @cached(tree=True) async def get_aggregation_groups_for_event( self, @@ -362,7 +425,7 @@ class RelationsWorkerStore(SQLBaseStore): %s; """ - def _get_if_event_has_relations(txn) -> List[str]: + def _get_if_events_have_relations(txn) -> List[str]: clauses: List[str] = [] clause, args = make_in_list_sql_clause( txn.database_engine, "relates_to_id", parent_ids @@ -387,7 +450,7 @@ class RelationsWorkerStore(SQLBaseStore): return [row[0] for row in txn] return await self.db_pool.runInteraction( - "get_if_event_has_relations", _get_if_event_has_relations + "get_if_events_have_relations", _get_if_events_have_relations ) async def has_user_annotated_event( diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 17b398bb69..7d694d852d 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -397,6 +397,20 @@ class RoomWorkerStore(SQLBaseStore): desc="is_room_blocked", ) + async def room_is_blocked_by(self, room_id: str) -> Optional[str]: + """ + Function to retrieve user who has blocked the room. + user_id is non-nullable + It returns None if the room is not blocked. + """ + return await self.db_pool.simple_select_one_onecol( + table="blocked_rooms", + keyvalues={"room_id": room_id}, + retcol="user_id", + allow_none=True, + desc="room_is_blocked_by", + ) + async def get_rooms_paginate( self, start: int, @@ -1775,3 +1789,21 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): self.is_room_blocked, (room_id,), ) + + async def unblock_room(self, room_id: str) -> None: + """Remove the room from blocking list. + + Args: + room_id: Room to unblock + """ + await self.db_pool.simple_delete( + table="blocked_rooms", + keyvalues={"room_id": room_id}, + desc="unblock_room", + ) + await self.db_pool.runInteraction( + "block_room_invalidation", + self._invalidate_cache_and_stream, + self.is_room_blocked, + (room_id,), + ) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 8b9c6adae2..e45adfcb55 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -131,24 +131,16 @@ def prepare_database( "config==None in prepare_database, but database is not empty" ) - # if it's a worker app, refuse to upgrade the database, to avoid multiple - # workers doing it at once. - if config.worker.worker_app is None: - _upgrade_existing_database( - cur, - version_info, - database_engine, - config, - databases=databases, - ) - elif version_info.current_version < SCHEMA_VERSION: - # If the DB is on an older version than we expect then we refuse - # to start the worker (as the main process needs to run first to - # update the schema). - raise UpgradeDatabaseException( - OUTDATED_SCHEMA_ON_WORKER_ERROR - % (SCHEMA_VERSION, version_info.current_version) - ) + # This should be run on all processes, master or worker. The master will + # apply the deltas, while workers will check if any outstanding deltas + # exist and raise an PrepareDatabaseException if they do. + _upgrade_existing_database( + cur, + version_info, + database_engine, + config, + databases=databases, + ) else: logger.info("%r: Initialising new database", databases) @@ -358,6 +350,18 @@ def _upgrade_existing_database( is_worker = config and config.worker.worker_app is not None + # If the schema version needs to be updated, and we are on a worker, we immediately + # know to bail out as workers cannot update the database schema. Only one process + # must update the database at the time, therefore we delegate this task to the master. + if is_worker and current_schema_state.current_version < SCHEMA_VERSION: + # If the DB is on an older version than we expect then we refuse + # to start the worker (as the main process needs to run first to + # update the schema). + raise UpgradeDatabaseException( + OUTDATED_SCHEMA_ON_WORKER_ERROR + % (SCHEMA_VERSION, current_schema_state.current_version) + ) + if ( current_schema_state.compat_version is not None and current_schema_state.compat_version > SCHEMA_VERSION diff --git a/synapse/storage/schema/main/delta/65/06remove_deleted_devices_from_device_inbox.sql b/synapse/storage/schema/main/delta/65/06remove_deleted_devices_from_device_inbox.sql new file mode 100644 index 0000000000..82f6408b36 --- /dev/null +++ b/synapse/storage/schema/main/delta/65/06remove_deleted_devices_from_device_inbox.sql @@ -0,0 +1,34 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Remove messages from the device_inbox table which were orphaned +-- when a device was deleted using Synapse earlier than 1.47.0. +-- This runs as background task, but may take a bit to finish. + +-- Remove any existing instances of this job running. It's OK to stop and restart this job, +-- as it's just deleting entries from a table - no progress will be lost. +-- +-- This is necessary due a similar migration running the job accidentally +-- being included in schema version 64 during v1.47.0rc1,rc2. If a +-- homeserver had updated from Synapse <=v1.45.0 (schema version <=64), +-- then they would have started running this background update already. +-- If that update was still running, then simply inserting it again would +-- cause an SQL failure. So we effectively do an "upsert" here instead. + +DELETE FROM background_updates WHERE update_name = 'remove_deleted_devices_from_device_inbox'; + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (6506, 'remove_deleted_devices_from_device_inbox', '{}'); diff --git a/synapse/storage/schema/main/delta/65/02_thread_relations.sql b/synapse/storage/schema/main/delta/65/07_arbitrary_relations.sql index d60517f7b4..267b2cb539 100644 --- a/synapse/storage/schema/main/delta/65/02_thread_relations.sql +++ b/synapse/storage/schema/main/delta/65/07_arbitrary_relations.sql @@ -15,4 +15,4 @@ -- Check old events for thread relations. INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (6502, 'event_thread_relation', '{}'); + (6507, 'event_arbitrary_relations', '{}'); diff --git a/synapse/storage/schema/main/delta/65/05remove_deleted_devices_from_device_inbox.sql b/synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql index 076179123d..d79455c2ce 100644 --- a/synapse/storage/schema/main/delta/65/05remove_deleted_devices_from_device_inbox.sql +++ b/synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql @@ -13,10 +13,6 @@ * limitations under the License. */ - --- Remove messages from the device_inbox table which were orphaned --- when a device was deleted using Synapse earlier than 1.47.0. --- This runs as background task, but may take a bit to finish. - +-- Background update to clear the inboxes of hidden and deleted devices. INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (6505, 'remove_deleted_devices_from_device_inbox', '{}'); + (6508, 'remove_dead_devices_from_device_inbox', '{}'); diff --git a/synapse/storage/schema/main/delta/65/10_expirable_refresh_tokens.sql b/synapse/storage/schema/main/delta/65/10_expirable_refresh_tokens.sql new file mode 100644 index 0000000000..bdc491c817 --- /dev/null +++ b/synapse/storage/schema/main/delta/65/10_expirable_refresh_tokens.sql @@ -0,0 +1,28 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +ALTER TABLE refresh_tokens + -- We add an expiry_ts column (in milliseconds since the Epoch) to refresh tokens. + -- They may not be used after they have expired. + -- If null, then the refresh token's lifetime is unlimited. + ADD COLUMN expiry_ts BIGINT DEFAULT NULL; + +ALTER TABLE refresh_tokens + -- We also add an ultimate session expiry time (in milliseconds since the Epoch). + -- No matter how much the access and refresh tokens are refreshed, they cannot + -- be extended past this time. + -- If null, then the session length is unlimited. + ADD COLUMN ultimate_session_expiry_ts BIGINT DEFAULT NULL; diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index ac56bc9a05..4ff3013908 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -89,31 +89,77 @@ def _load_current_id( return (max if step > 0 else min)(current_id, step) -class AbstractStreamIdGenerator(metaclass=abc.ABCMeta): - @abc.abstractmethod - def get_next(self) -> AsyncContextManager[int]: - raise NotImplementedError() +class AbstractStreamIdTracker(metaclass=abc.ABCMeta): + """Tracks the "current" stream ID of a stream that may have multiple writers. + + Stream IDs are monotonically increasing or decreasing integers representing write + transactions. The "current" stream ID is the stream ID such that all transactions + with equal or smaller stream IDs have completed. Since transactions may complete out + of order, this is not the same as the stream ID of the last completed transaction. + + Completed transactions include both committed transactions and transactions that + have been rolled back. + """ @abc.abstractmethod - def get_next_mult(self, n: int) -> AsyncContextManager[Sequence[int]]: + def advance(self, instance_name: str, new_id: int) -> None: + """Advance the position of the named writer to the given ID, if greater + than existing entry. + """ raise NotImplementedError() @abc.abstractmethod def get_current_token(self) -> int: + """Returns the maximum stream id such that all stream ids less than or + equal to it have been successfully persisted. + + Returns: + The maximum stream id. + """ raise NotImplementedError() @abc.abstractmethod def get_current_token_for_writer(self, instance_name: str) -> int: + """Returns the position of the given writer. + + For streams with single writers this is equivalent to `get_current_token`. + """ + raise NotImplementedError() + + +class AbstractStreamIdGenerator(AbstractStreamIdTracker): + """Generates stream IDs for a stream that may have multiple writers. + + Each stream ID represents a write transaction, whose completion is tracked + so that the "current" stream ID of the stream can be determined. + + See `AbstractStreamIdTracker` for more details. + """ + + @abc.abstractmethod + def get_next(self) -> AsyncContextManager[int]: + """ + Usage: + async with stream_id_gen.get_next() as stream_id: + # ... persist event ... + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_next_mult(self, n: int) -> AsyncContextManager[Sequence[int]]: + """ + Usage: + async with stream_id_gen.get_next(n) as stream_ids: + # ... persist events ... + """ raise NotImplementedError() class StreamIdGenerator(AbstractStreamIdGenerator): - """Used to generate new stream ids when persisting events while keeping - track of which transactions have been completed. + """Generates and tracks stream IDs for a stream with a single writer. - This allows us to get the "current" stream id, i.e. the stream id such that - all ids less than or equal to it have completed. This handles the fact that - persistence of events can complete out of order. + This class must only be used when the current Synapse process is the sole + writer for a stream. Args: db_conn(connection): A database connection to use to fetch the @@ -157,12 +203,12 @@ class StreamIdGenerator(AbstractStreamIdGenerator): # The key and values are the same, but we never look at the values. self._unfinished_ids: OrderedDict[int, int] = OrderedDict() + def advance(self, instance_name: str, new_id: int) -> None: + # `StreamIdGenerator` should only be used when there is a single writer, + # so replication should never happen. + raise Exception("Replication is not supported by StreamIdGenerator") + def get_next(self) -> AsyncContextManager[int]: - """ - Usage: - async with stream_id_gen.get_next() as stream_id: - # ... persist event ... - """ with self._lock: self._current += self._step next_id = self._current @@ -180,11 +226,6 @@ class StreamIdGenerator(AbstractStreamIdGenerator): return _AsyncCtxManagerWrapper(manager()) def get_next_mult(self, n: int) -> AsyncContextManager[Sequence[int]]: - """ - Usage: - async with stream_id_gen.get_next(n) as stream_ids: - # ... persist events ... - """ with self._lock: next_ids = range( self._current + self._step, @@ -208,12 +249,6 @@ class StreamIdGenerator(AbstractStreamIdGenerator): return _AsyncCtxManagerWrapper(manager()) def get_current_token(self) -> int: - """Returns the maximum stream id such that all stream ids less than or - equal to it have been successfully persisted. - - Returns: - The maximum stream id. - """ with self._lock: if self._unfinished_ids: return next(iter(self._unfinished_ids)) - self._step @@ -221,16 +256,11 @@ class StreamIdGenerator(AbstractStreamIdGenerator): return self._current def get_current_token_for_writer(self, instance_name: str) -> int: - """Returns the position of the given writer. - - For streams with single writers this is equivalent to - `get_current_token`. - """ return self.get_current_token() class MultiWriterIdGenerator(AbstractStreamIdGenerator): - """An ID generator that tracks a stream that can have multiple writers. + """Generates and tracks stream IDs for a stream with multiple writers. Uses a Postgres sequence to coordinate ID assignment, but positions of other writers will only get updated when `advance` is called (by replication). @@ -475,12 +505,6 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator): return stream_ids def get_next(self) -> AsyncContextManager[int]: - """ - Usage: - async with stream_id_gen.get_next() as stream_id: - # ... persist event ... - """ - # If we have a list of instances that are allowed to write to this # stream, make sure we're in it. if self._writers and self._instance_name not in self._writers: @@ -492,12 +516,6 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator): return cast(AsyncContextManager[int], _MultiWriterCtxManager(self)) def get_next_mult(self, n: int) -> AsyncContextManager[List[int]]: - """ - Usage: - async with stream_id_gen.get_next_mult(5) as stream_ids: - # ... persist events ... - """ - # If we have a list of instances that are allowed to write to this # stream, make sure we're in it. if self._writers and self._instance_name not in self._writers: @@ -597,15 +615,9 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator): self._add_persisted_position(next_id) def get_current_token(self) -> int: - """Returns the maximum stream id such that all stream ids less than or - equal to it have been successfully persisted. - """ - return self.get_persisted_upto_position() def get_current_token_for_writer(self, instance_name: str) -> int: - """Returns the position of the given writer.""" - # If we don't have an entry for the given instance name, we assume it's a # new writer. # @@ -631,10 +643,6 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator): } def advance(self, instance_name: str, new_id: int) -> None: - """Advance the position of the named writer to the given ID, if greater - than existing entry. - """ - new_id *= self._return_factor with self._lock: diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index 561b962e14..20ce294209 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -27,6 +27,7 @@ from typing import ( Generic, Hashable, Iterable, + Iterator, Optional, Set, TypeVar, @@ -40,7 +41,6 @@ from typing_extensions import ContextManager from twisted.internet import defer from twisted.internet.defer import CancelledError from twisted.internet.interfaces import IReactorTime -from twisted.python import failure from twisted.python.failure import Failure from synapse.logging.context import ( @@ -78,7 +78,7 @@ class ObservableDeferred(Generic[_T]): object.__setattr__(self, "_result", None) object.__setattr__(self, "_observers", []) - def callback(r): + def callback(r: _T) -> _T: object.__setattr__(self, "_result", (True, r)) # once we have set _result, no more entries will be added to _observers, @@ -98,7 +98,7 @@ class ObservableDeferred(Generic[_T]): ) return r - def errback(f): + def errback(f: Failure) -> Optional[Failure]: object.__setattr__(self, "_result", (False, f)) # once we have set _result, no more entries will be added to _observers, @@ -109,7 +109,7 @@ class ObservableDeferred(Generic[_T]): for observer in observers: # This is a little bit of magic to correctly propagate stack # traces when we `await` on one of the observer deferreds. - f.value.__failure__ = f + f.value.__failure__ = f # type: ignore[union-attr] try: observer.errback(f) except Exception as e: @@ -314,7 +314,7 @@ class Linearizer: # will release the lock. @contextmanager - def _ctx_manager(_): + def _ctx_manager(_: None) -> Iterator[None]: try: yield finally: @@ -355,7 +355,7 @@ class Linearizer: new_defer = make_deferred_yieldable(defer.Deferred()) entry.deferreds[new_defer] = 1 - def cb(_r): + def cb(_r: None) -> "defer.Deferred[None]": logger.debug("Acquired linearizer lock %r for key %r", self.name, key) entry.count += 1 @@ -371,7 +371,7 @@ class Linearizer: # code must be synchronous, so this is the only sensible place.) return self._clock.sleep(0) - def eb(e): + def eb(e: Failure) -> Failure: logger.info("defer %r got err %r", new_defer, e) if isinstance(e, CancelledError): logger.debug( @@ -435,7 +435,7 @@ class ReadWriteLock: await make_deferred_yieldable(curr_writer) @contextmanager - def _ctx_manager(): + def _ctx_manager() -> Iterator[None]: try: yield finally: @@ -464,7 +464,7 @@ class ReadWriteLock: await make_deferred_yieldable(defer.gatherResults(to_wait_on)) @contextmanager - def _ctx_manager(): + def _ctx_manager() -> Iterator[None]: try: yield finally: @@ -524,7 +524,7 @@ def timeout_deferred( delayed_call = reactor.callLater(timeout, time_it_out) - def convert_cancelled(value: failure.Failure): + def convert_cancelled(value: Failure) -> Failure: # if the original deferred was cancelled, and our timeout has fired, then # the reason it was cancelled was due to our timeout. Turn the CancelledError # into a TimeoutError. @@ -534,7 +534,7 @@ def timeout_deferred( deferred.addErrback(convert_cancelled) - def cancel_timeout(result): + def cancel_timeout(result: _T) -> _T: # stop the pending call to cancel the deferred if it's been fired if delayed_call.active(): delayed_call.cancel() @@ -542,11 +542,11 @@ def timeout_deferred( deferred.addBoth(cancel_timeout) - def success_cb(val): + def success_cb(val: _T) -> None: if not new_d.called: new_d.callback(val) - def failure_cb(val): + def failure_cb(val: Failure) -> None: if not new_d.called: new_d.errback(val) @@ -557,13 +557,13 @@ def timeout_deferred( # This class can't be generic because it uses slots with attrs. # See: https://github.com/python-attrs/attrs/issues/313 -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class DoneAwaitable: # should be: Generic[R] """Simple awaitable that returns the provided value.""" - value = attr.ib(type=Any) # should be: R + value: Any # should be: R - def __await__(self): + def __await__(self) -> Any: return self def __iter__(self) -> "DoneAwaitable": diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index df4d61e4b6..15debd6c46 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -17,7 +17,7 @@ import logging import typing from enum import Enum, auto from sys import intern -from typing import Callable, Dict, Optional, Sized +from typing import Any, Callable, Dict, List, Optional, Sized import attr from prometheus_client.core import Gauge @@ -58,20 +58,20 @@ class EvictionReason(Enum): time = auto() -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class CacheMetric: - _cache = attr.ib() - _cache_type = attr.ib(type=str) - _cache_name = attr.ib(type=str) - _collect_callback = attr.ib(type=Optional[Callable]) + _cache: Sized + _cache_type: str + _cache_name: str + _collect_callback: Optional[Callable] - hits = attr.ib(default=0) - misses = attr.ib(default=0) + hits: int = 0 + misses: int = 0 eviction_size_by_reason: typing.Counter[EvictionReason] = attr.ib( factory=collections.Counter ) - memory_usage = attr.ib(default=None) + memory_usage: Optional[int] = None def inc_hits(self) -> None: self.hits += 1 @@ -89,13 +89,14 @@ class CacheMetric: self.memory_usage += memory def dec_memory_usage(self, memory: int) -> None: + assert self.memory_usage is not None self.memory_usage -= memory def clear_memory_usage(self) -> None: if self.memory_usage is not None: self.memory_usage = 0 - def describe(self): + def describe(self) -> List[str]: return [] def collect(self) -> None: @@ -118,8 +119,9 @@ class CacheMetric: self.eviction_size_by_reason[reason] ) cache_total.labels(self._cache_name).set(self.hits + self.misses) - if getattr(self._cache, "max_size", None): - cache_max_size.labels(self._cache_name).set(self._cache.max_size) + max_size = getattr(self._cache, "max_size", None) + if max_size: + cache_max_size.labels(self._cache_name).set(max_size) if TRACK_MEMORY_USAGE: # self.memory_usage can be None if nothing has been inserted @@ -193,7 +195,7 @@ KNOWN_KEYS = { } -def intern_string(string): +def intern_string(string: Optional[str]) -> Optional[str]: """Takes a (potentially) unicode string and interns it if it's ascii""" if string is None: return None @@ -204,7 +206,7 @@ def intern_string(string): return string -def intern_dict(dictionary): +def intern_dict(dictionary: Dict[str, Any]) -> Dict[str, Any]: """Takes a dictionary and interns well known keys and their values""" return { KNOWN_KEYS.get(key, key): _intern_known_values(key, value) @@ -212,7 +214,7 @@ def intern_dict(dictionary): } -def _intern_known_values(key, value): +def _intern_known_values(key: str, value: Any) -> Any: intern_keys = ("event_id", "room_id", "sender", "user_id", "type", "state_key") if key in intern_keys: diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index da502aec11..3c4cc093af 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -289,7 +289,7 @@ class DeferredCache(Generic[KT, VT]): callbacks = [callback] if callback else [] self.cache.set(key, value, callbacks=callbacks) - def invalidate(self, key) -> None: + def invalidate(self, key: KT) -> None: """Delete a key, or tree of entries If the cache is backed by a regular dict, then "key" must be of diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index b9dcca17f1..375cd443f1 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -19,12 +19,15 @@ import logging from typing import ( Any, Callable, + Dict, Generic, + Hashable, Iterable, Mapping, Optional, Sequence, Tuple, + Type, TypeVar, Union, cast, @@ -32,6 +35,7 @@ from typing import ( from weakref import WeakValueDictionary from twisted.internet import defer +from twisted.python.failure import Failure from synapse.logging.context import make_deferred_yieldable, preserve_fn from synapse.util import unwrapFirstError @@ -60,7 +64,12 @@ class _CachedFunction(Generic[F]): class _CacheDescriptorBase: - def __init__(self, orig: Callable[..., Any], num_args, cache_context=False): + def __init__( + self, + orig: Callable[..., Any], + num_args: Optional[int], + cache_context: bool = False, + ): self.orig = orig arg_spec = inspect.getfullargspec(orig) @@ -172,14 +181,14 @@ class LruCacheDescriptor(_CacheDescriptorBase): def __init__( self, - orig, + orig: Callable[..., Any], max_entries: int = 1000, cache_context: bool = False, ): super().__init__(orig, num_args=None, cache_context=cache_context) self.max_entries = max_entries - def __get__(self, obj, owner): + def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]: cache: LruCache[CacheKey, Any] = LruCache( cache_name=self.orig.__name__, max_size=self.max_entries, @@ -189,7 +198,7 @@ class LruCacheDescriptor(_CacheDescriptorBase): sentinel = LruCacheDescriptor._Sentinel.sentinel @functools.wraps(self.orig) - def _wrapped(*args, **kwargs): + def _wrapped(*args: Any, **kwargs: Any) -> Any: invalidate_callback = kwargs.pop("on_invalidate", None) callbacks = (invalidate_callback,) if invalidate_callback else () @@ -245,19 +254,19 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): return r1 + r2 Args: - num_args (int): number of positional arguments (excluding ``self`` and + num_args: number of positional arguments (excluding ``self`` and ``cache_context``) to use as cache keys. Defaults to all named args of the function. """ def __init__( self, - orig, - max_entries=1000, - num_args=None, - tree=False, - cache_context=False, - iterable=False, + orig: Callable[..., Any], + max_entries: int = 1000, + num_args: Optional[int] = None, + tree: bool = False, + cache_context: bool = False, + iterable: bool = False, prune_unread_entries: bool = True, ): super().__init__(orig, num_args=num_args, cache_context=cache_context) @@ -272,7 +281,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): self.iterable = iterable self.prune_unread_entries = prune_unread_entries - def __get__(self, obj, owner): + def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]: cache: DeferredCache[CacheKey, Any] = DeferredCache( name=self.orig.__name__, max_entries=self.max_entries, @@ -284,7 +293,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): get_cache_key = self.cache_key_builder @functools.wraps(self.orig) - def _wrapped(*args, **kwargs): + def _wrapped(*args: Any, **kwargs: Any) -> Any: # If we're passed a cache_context then we'll want to call its invalidate() # whenever we are invalidated invalidate_callback = kwargs.pop("on_invalidate", None) @@ -335,13 +344,19 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): of results. """ - def __init__(self, orig, cached_method_name, list_name, num_args=None): + def __init__( + self, + orig: Callable[..., Any], + cached_method_name: str, + list_name: str, + num_args: Optional[int] = None, + ): """ Args: - orig (function) - cached_method_name (str): The name of the cached method. - list_name (str): Name of the argument which is the bulk lookup list - num_args (int): number of positional arguments (excluding ``self``, + orig + cached_method_name: The name of the cached method. + list_name: Name of the argument which is the bulk lookup list + num_args: number of positional arguments (excluding ``self``, but including list_name) to use as cache keys. Defaults to all named args of the function. """ @@ -360,13 +375,15 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): % (self.list_name, cached_method_name) ) - def __get__(self, obj, objtype=None): + def __get__( + self, obj: Optional[Any], objtype: Optional[Type] = None + ) -> Callable[..., Any]: cached_method = getattr(obj, self.cached_method_name) cache: DeferredCache[CacheKey, Any] = cached_method.cache num_args = cached_method.num_args @functools.wraps(self.orig) - def wrapped(*args, **kwargs): + def wrapped(*args: Any, **kwargs: Any) -> Any: # If we're passed a cache_context then we'll want to call its # invalidate() whenever we are invalidated invalidate_callback = kwargs.pop("on_invalidate", None) @@ -377,7 +394,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): results = {} - def update_results_dict(res, arg): + def update_results_dict(res: Any, arg: Hashable) -> None: results[arg] = res # list of deferreds to wait for @@ -389,13 +406,13 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): # otherwise a tuple is used. if num_args == 1: - def arg_to_cache_key(arg): + def arg_to_cache_key(arg: Hashable) -> Hashable: return arg else: keylist = list(keyargs) - def arg_to_cache_key(arg): + def arg_to_cache_key(arg: Hashable) -> Hashable: keylist[self.list_pos] = arg return tuple(keylist) @@ -421,7 +438,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): key = arg_to_cache_key(arg) cache.set(key, deferred, callback=invalidate_callback) - def complete_all(res): + def complete_all(res: Dict[Hashable, Any]) -> None: # the wrapped function has completed. It returns a # a dict. We can now resolve the observable deferreds in # the cache and update our own result map. @@ -430,7 +447,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): deferreds_map[e].callback(val) results[e] = val - def errback(f): + def errback(f: Failure) -> Failure: # the wrapped function has failed. Invalidate any cache # entries we're supposed to be populating, and fail # their deferreds. diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index c3f72aa06d..67ee4c693b 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -19,6 +19,8 @@ from typing import Any, Generic, Optional, TypeVar, Union, overload import attr from typing_extensions import Literal +from twisted.internet import defer + from synapse.config import cache as cache_config from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util import Clock @@ -81,7 +83,7 @@ class ExpiringCache(Generic[KT, VT]): # Don't bother starting the loop if things never expire return - def f(): + def f() -> "defer.Deferred[None]": return run_as_background_process( "prune_cache_%s" % self._cache_name, self._prune_cache ) @@ -157,7 +159,7 @@ class ExpiringCache(Generic[KT, VT]): self[key] = value return value - def _prune_cache(self) -> None: + async def _prune_cache(self) -> None: if not self._expiry_ms: # zero expiry time means don't expire. This should never get called # since we have this check in start too. @@ -210,7 +212,7 @@ class ExpiringCache(Generic[KT, VT]): return False -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class _CacheEntry: - time = attr.ib(type=int) - value = attr.ib() + time: int + value: Any diff --git a/synapse/util/distributor.py b/synapse/util/distributor.py index 31097d6439..91837655f8 100644 --- a/synapse/util/distributor.py +++ b/synapse/util/distributor.py @@ -18,12 +18,13 @@ from twisted.internet import defer from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.types import UserID from synapse.util.async_helpers import maybe_awaitable logger = logging.getLogger(__name__) -def user_left_room(distributor, user, room_id): +def user_left_room(distributor: "Distributor", user: UserID, room_id: str) -> None: distributor.fire("user_left_room", user=user, room_id=room_id) @@ -63,7 +64,7 @@ class Distributor: self.pre_registration[name] = [] self.pre_registration[name].append(observer) - def fire(self, name: str, *args, **kwargs) -> None: + def fire(self, name: str, *args: Any, **kwargs: Any) -> None: """Dispatches the given signal to the registered observers. Runs the observers as a background process. Does not return a deferred. @@ -95,7 +96,7 @@ class Signal: Each observer callable may return a Deferred.""" self.observers.append(observer) - def fire(self, *args, **kwargs) -> "defer.Deferred[List[Any]]": + def fire(self, *args: Any, **kwargs: Any) -> "defer.Deferred[List[Any]]": """Invokes every callable in the observer list, passing in the args and kwargs. Exceptions thrown by observers are logged but ignored. It is not an error to fire a signal with no observers. @@ -103,7 +104,7 @@ class Signal: Returns a Deferred that will complete when all the observers have completed.""" - async def do(observer): + async def do(observer: Callable[..., Any]) -> Any: try: return await maybe_awaitable(observer(*args, **kwargs)) except Exception as e: @@ -120,5 +121,5 @@ class Signal: defer.gatherResults(deferreds, consumeErrors=True) ) - def __repr__(self): + def __repr__(self) -> str: return "<Signal name=%r>" % (self.name,) diff --git a/synapse/util/gai_resolver.py b/synapse/util/gai_resolver.py index a447ce4e55..214eb17fbc 100644 --- a/synapse/util/gai_resolver.py +++ b/synapse/util/gai_resolver.py @@ -3,23 +3,52 @@ # We copy it here as we need to instantiate `GAIResolver` manually, but it is a # private class. - from socket import ( AF_INET, AF_INET6, AF_UNSPEC, SOCK_DGRAM, SOCK_STREAM, + AddressFamily, + SocketKind, gaierror, getaddrinfo, ) +from typing import ( + TYPE_CHECKING, + Callable, + List, + NoReturn, + Optional, + Sequence, + Tuple, + Type, + Union, +) from zope.interface import implementer from twisted.internet.address import IPv4Address, IPv6Address -from twisted.internet.interfaces import IHostnameResolver, IHostResolution +from twisted.internet.interfaces import ( + IAddress, + IHostnameResolver, + IHostResolution, + IReactorThreads, + IResolutionReceiver, +) from twisted.internet.threads import deferToThreadPool +if TYPE_CHECKING: + # The types below are copied from + # https://github.com/twisted/twisted/blob/release-21.2.0-10091/src/twisted/internet/interfaces.py + # so that the type hints can match the interfaces. + from twisted.python.runtime import platform + + if platform.supportsThreads(): + from twisted.python.threadpool import ThreadPool + else: + ThreadPool = object # type: ignore[misc, assignment] + @implementer(IHostResolution) class HostResolution: @@ -27,13 +56,13 @@ class HostResolution: The in-progress resolution of a given hostname. """ - def __init__(self, name): + def __init__(self, name: str): """ Create a L{HostResolution} with the given name. """ self.name = name - def cancel(self): + def cancel(self) -> NoReturn: # IHostResolution.cancel raise NotImplementedError() @@ -62,6 +91,17 @@ _socktypeToType = { } +_GETADDRINFO_RESULT = List[ + Tuple[ + AddressFamily, + SocketKind, + int, + str, + Union[Tuple[str, int], Tuple[str, int, int, int]], + ] +] + + @implementer(IHostnameResolver) class GAIResolver: """ @@ -69,7 +109,12 @@ class GAIResolver: L{getaddrinfo} in a thread. """ - def __init__(self, reactor, getThreadPool=None, getaddrinfo=getaddrinfo): + def __init__( + self, + reactor: IReactorThreads, + getThreadPool: Optional[Callable[[], "ThreadPool"]] = None, + getaddrinfo: Callable[[str, int, int, int], _GETADDRINFO_RESULT] = getaddrinfo, + ): """ Create a L{GAIResolver}. @param reactor: the reactor to schedule result-delivery on @@ -89,14 +134,16 @@ class GAIResolver: ) self._getaddrinfo = getaddrinfo - def resolveHostName( + # The types on IHostnameResolver is incorrect in Twisted, see + # https://twistedmatrix.com/trac/ticket/10276 + def resolveHostName( # type: ignore[override] self, - resolutionReceiver, - hostName, - portNumber=0, - addressTypes=None, - transportSemantics="TCP", - ): + resolutionReceiver: IResolutionReceiver, + hostName: str, + portNumber: int = 0, + addressTypes: Optional[Sequence[Type[IAddress]]] = None, + transportSemantics: str = "TCP", + ) -> IHostResolution: """ See L{IHostnameResolver.resolveHostName} @param resolutionReceiver: see interface @@ -112,7 +159,7 @@ class GAIResolver: ] socketType = _transportToSocket[transportSemantics] - def get(): + def get() -> _GETADDRINFO_RESULT: try: return self._getaddrinfo( hostName, portNumber, addressFamily, socketType @@ -125,7 +172,7 @@ class GAIResolver: resolutionReceiver.resolutionBegan(resolution) @d.addCallback - def deliverResults(result): + def deliverResults(result: _GETADDRINFO_RESULT) -> None: for family, socktype, _proto, _cannoname, sockaddr in result: addrType = _afToType[family] resolutionReceiver.addressResolved( diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 1e784b3f1f..98ee49af6e 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -56,14 +56,22 @@ block_db_sched_duration = Counter( "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"] ) + +# This is dynamically created in InFlightGauge.__init__. +class _InFlightMetric(Protocol): + real_time_max: float + real_time_sum: float + + # Tracks the number of blocks currently active -in_flight = InFlightGauge( +in_flight: InFlightGauge[_InFlightMetric] = InFlightGauge( "synapse_util_metrics_block_in_flight", "", labels=["block_name"], sub_metrics=["real_time_max", "real_time_sum"], ) + T = TypeVar("T", bound=Callable[..., Any]) @@ -180,7 +188,7 @@ class Measure: """ return self._logging_context.get_resource_usage() - def _update_in_flight(self, metrics) -> None: + def _update_in_flight(self, metrics: _InFlightMetric) -> None: """Gets called when processing in flight metrics""" assert self.start is not None duration = self.clock.time() - self.start diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index f029432191..ea1032b4fc 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -19,6 +19,8 @@ import string from collections.abc import Iterable from typing import Optional, Tuple +from netaddr import valid_ipv6 + from synapse.api.errors import Codes, SynapseError _string_with_symbols = string.digits + string.ascii_letters + ".,;:^&*-_+=#~@" @@ -97,7 +99,10 @@ def parse_server_name(server_name: str) -> Tuple[str, Optional[int]]: raise ValueError("Invalid server name '%s'" % server_name) -VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z.-]+\\Z") +# An approximation of the domain name syntax in RFC 1035, section 2.3.1. +# NB: "\Z" is not equivalent to "$". +# The latter will match the position before a "\n" at the end of a string. +VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*\\Z") def parse_and_validate_server_name(server_name: str) -> Tuple[str, Optional[int]]: @@ -122,13 +127,15 @@ def parse_and_validate_server_name(server_name: str) -> Tuple[str, Optional[int] if host[0] == "[": if host[-1] != "]": raise ValueError("Mismatched [...] in server name '%s'" % (server_name,)) - return host, port - # otherwise it should only be alphanumerics. - if not VALID_HOST_REGEX.match(host): - raise ValueError( - "Server name '%s' contains invalid characters" % (server_name,) - ) + # valid_ipv6 raises when given an empty string + ipv6_address = host[1:-1] + if not ipv6_address or not valid_ipv6(ipv6_address): + raise ValueError( + "Server name '%s' is not a valid IPv6 address" % (server_name,) + ) + elif not VALID_HOST_REGEX.match(host): + raise ValueError("Server name '%s' has an invalid format" % (server_name,)) return host, port diff --git a/tests/config/test_load.py b/tests/config/test_load.py index 765258c47a..69a4e9413b 100644 --- a/tests/config/test_load.py +++ b/tests/config/test_load.py @@ -46,15 +46,16 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase): "was: %r" % (config.key.macaroon_secret_key,) ) - config = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) + config2 = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) + assert config2 is not None self.assertTrue( - hasattr(config.key, "macaroon_secret_key"), + hasattr(config2.key, "macaroon_secret_key"), "Want config to have attr macaroon_secret_key", ) - if len(config.key.macaroon_secret_key) < 5: + if len(config2.key.macaroon_secret_key) < 5: self.fail( "Want macaroon secret key to be string of at least length 5," - "was: %r" % (config.key.macaroon_secret_key,) + "was: %r" % (config2.key.macaroon_secret_key,) ) def test_load_succeeds_if_macaroon_secret_key_missing(self): @@ -62,6 +63,9 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase): config1 = HomeServerConfig.load_config("", ["-c", self.config_file]) config2 = HomeServerConfig.load_config("", ["-c", self.config_file]) config3 = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) + assert config1 is not None + assert config2 is not None + assert config3 is not None self.assertEqual( config1.key.macaroon_secret_key, config2.key.macaroon_secret_key ) @@ -78,14 +82,16 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase): config = HomeServerConfig.load_config("", ["-c", self.config_file]) self.assertFalse(config.registration.enable_registration) - config = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) - self.assertFalse(config.registration.enable_registration) + config2 = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) + assert config2 is not None + self.assertFalse(config2.registration.enable_registration) # Check that either config value is clobbered by the command line. - config = HomeServerConfig.load_or_generate_config( + config3 = HomeServerConfig.load_or_generate_config( "", ["-c", self.config_file, "--enable-registration"] ) - self.assertTrue(config.registration.enable_registration) + assert config3 is not None + self.assertTrue(config3.registration.enable_registration) def test_stats_enabled(self): self.generate_config_and_remove_lines_containing("enable_metrics") @@ -94,3 +100,12 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase): # The default Metrics Flags are off by default. config = HomeServerConfig.load_config("", ["-c", self.config_file]) self.assertFalse(config.metrics.metrics_flags.known_servers) + + def test_depreciated_identity_server_flag_throws_error(self): + self.generate_config() + # Needed to ensure that actual key/value pair added below don't end up on a line with a comment + self.add_lines_to_config([" "]) + # Check that presence of "trust_identity_server_for_password" throws config error + self.add_lines_to_config(["trust_identity_server_for_password_resets: true"]) + with self.assertRaises(ConfigError): + HomeServerConfig.load_config("", ["-c", self.config_file]) diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index cbecc1c20f..17a9fb63a1 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -1,4 +1,4 @@ -# Copyright 2017 New Vector Ltd +# Copyright 2017-2021 The Matrix.org Foundation C.I.C # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ import signedjson.sign from nacl.signing import SigningKey from signedjson.key import encode_verify_key_base64, get_verify_key +from twisted.internet import defer from twisted.internet.defer import Deferred, ensureDeferred from synapse.api.errors import SynapseError @@ -40,7 +41,7 @@ from synapse.storage.keys import FetchKeyResult from tests import unittest from tests.test_utils import make_awaitable -from tests.unittest import logcontext_clean +from tests.unittest import logcontext_clean, override_config class MockPerspectiveServer: @@ -197,7 +198,7 @@ class KeyringTestCase(unittest.HomeserverTestCase): # self.assertFalse(d.called) self.get_success(d) - def test_verify_for_server_locally(self): + def test_verify_for_local_server(self): """Ensure that locally signed JSON can be verified without fetching keys over federation """ @@ -209,6 +210,56 @@ class KeyringTestCase(unittest.HomeserverTestCase): d = kr.verify_json_for_server(self.hs.hostname, json1, 0) self.get_success(d) + OLD_KEY = signedjson.key.generate_signing_key("old") + + @override_config( + { + "old_signing_keys": { + f"{OLD_KEY.alg}:{OLD_KEY.version}": { + "key": encode_verify_key_base64(OLD_KEY.verify_key), + "expired_ts": 1000, + } + } + } + ) + def test_verify_for_local_server_old_key(self): + """Can also use keys in old_signing_keys for verification""" + json1 = {} + signedjson.sign.sign_json(json1, self.hs.hostname, self.OLD_KEY) + + kr = keyring.Keyring(self.hs) + d = kr.verify_json_for_server(self.hs.hostname, json1, 0) + self.get_success(d) + + def test_verify_for_local_server_unknown_key(self): + """Local keys that we no longer have should be fetched via the fetcher""" + + # the key we'll sign things with (nb, not known to the Keyring) + key2 = signedjson.key.generate_signing_key("2") + + # set up a mock fetcher which will return the key + async def get_keys( + server_name: str, key_ids: List[str], minimum_valid_until_ts: int + ) -> Dict[str, FetchKeyResult]: + self.assertEqual(server_name, self.hs.hostname) + self.assertEqual(key_ids, [get_key_id(key2)]) + + return {get_key_id(key2): FetchKeyResult(get_verify_key(key2), 1200)} + + mock_fetcher = Mock() + mock_fetcher.get_keys = Mock(side_effect=get_keys) + kr = keyring.Keyring( + self.hs, key_fetchers=(StoreKeyFetcher(self.hs), mock_fetcher) + ) + + # sign the json + json1 = {} + signedjson.sign.sign_json(json1, self.hs.hostname, key2) + + # ... and check we can verify it. + d = kr.verify_json_for_server(self.hs.hostname, json1, 0) + self.get_success(d) + def test_verify_json_for_server_with_null_valid_until_ms(self): """Tests that we correctly handle key requests for keys we've stored with a null `ts_valid_until_ms` @@ -527,6 +578,76 @@ class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase): bytes(res["key_json"]), canonicaljson.encode_canonical_json(response) ) + def test_get_multiple_keys_from_perspectives(self): + """Check that we can correctly request multiple keys for the same server""" + + fetcher = PerspectivesKeyFetcher(self.hs) + + SERVER_NAME = "server2" + + testkey1 = signedjson.key.generate_signing_key("ver1") + testverifykey1 = signedjson.key.get_verify_key(testkey1) + testverifykey1_id = "ed25519:ver1" + + testkey2 = signedjson.key.generate_signing_key("ver2") + testverifykey2 = signedjson.key.get_verify_key(testkey2) + testverifykey2_id = "ed25519:ver2" + + VALID_UNTIL_TS = 200 * 1000 + + response1 = self.build_perspectives_response( + SERVER_NAME, + testkey1, + VALID_UNTIL_TS, + ) + response2 = self.build_perspectives_response( + SERVER_NAME, + testkey2, + VALID_UNTIL_TS, + ) + + async def post_json(destination, path, data, **kwargs): + self.assertEqual(destination, self.mock_perspective_server.server_name) + self.assertEqual(path, "/_matrix/key/v2/query") + + # check that the request is for the expected keys + q = data["server_keys"] + + self.assertEqual( + list(q[SERVER_NAME].keys()), [testverifykey1_id, testverifykey2_id] + ) + return {"server_keys": [response1, response2]} + + self.http_client.post_json.side_effect = post_json + + # fire off two separate requests; they should get merged together into a + # single HTTP hit. + request1_d = defer.ensureDeferred( + fetcher.get_keys(SERVER_NAME, [testverifykey1_id], 0) + ) + request2_d = defer.ensureDeferred( + fetcher.get_keys(SERVER_NAME, [testverifykey2_id], 0) + ) + + keys1 = self.get_success(request1_d) + self.assertIn(testverifykey1_id, keys1) + k = keys1[testverifykey1_id] + self.assertEqual(k.valid_until_ts, VALID_UNTIL_TS) + self.assertEqual(k.verify_key, testverifykey1) + self.assertEqual(k.verify_key.alg, "ed25519") + self.assertEqual(k.verify_key.version, "ver1") + + keys2 = self.get_success(request2_d) + self.assertIn(testverifykey2_id, keys2) + k = keys2[testverifykey2_id] + self.assertEqual(k.valid_until_ts, VALID_UNTIL_TS) + self.assertEqual(k.verify_key, testverifykey2) + self.assertEqual(k.verify_key.alg, "ed25519") + self.assertEqual(k.verify_key.version, "ver2") + + # finally, ensure that only one request was sent + self.assertEqual(self.http_client.post_json.call_count, 1) + def test_get_perspectives_own_key(self): """Check that we can get the perspectives server's own keys diff --git a/tests/federation/transport/test_client.py b/tests/federation/transport/test_client.py new file mode 100644 index 0000000000..0b19159961 --- /dev/null +++ b/tests/federation/transport/test_client.py @@ -0,0 +1,50 @@ +import json + +from synapse.api.room_versions import RoomVersions +from synapse.federation.transport.client import SendJoinParser + +from tests.unittest import TestCase + + +class SendJoinParserTestCase(TestCase): + def test_two_writes(self) -> None: + """Test that the parser can sensibly deserialise an input given in two slices.""" + parser = SendJoinParser(RoomVersions.V1, True) + parent_event = { + "content": { + "see_room_version_spec": "The event format changes depending on the room version." + }, + "event_id": "$authparent", + "room_id": "!somewhere:example.org", + "type": "m.room.minimal_pdu", + } + state = { + "content": { + "see_room_version_spec": "The event format changes depending on the room version." + }, + "event_id": "$DoNotThinkAboutTheEvent", + "room_id": "!somewhere:example.org", + "type": "m.room.minimal_pdu", + } + response = [ + 200, + { + "auth_chain": [parent_event], + "origin": "matrix.org", + "state": [state], + }, + ] + serialised_response = json.dumps(response).encode() + + # Send data to the parser + parser.write(serialised_response[:100]) + parser.write(serialised_response[100:]) + + # Retrieve the parsed SendJoinResponse + parsed_response = parser.finish() + + # Sanity check the parsing gave us sensible data. + self.assertEqual(len(parsed_response.auth_events), 1, parsed_response) + self.assertEqual(len(parsed_response.state), 1, parsed_response) + self.assertEqual(parsed_response.event_dict, {}, parsed_response) + self.assertIsNone(parsed_response.event, parsed_response) diff --git a/tests/handlers/test_auth.py b/tests/handlers/test_auth.py index 12857053e7..72e176da75 100644 --- a/tests/handlers/test_auth.py +++ b/tests/handlers/test_auth.py @@ -116,7 +116,7 @@ class AuthTestCase(unittest.HomeserverTestCase): self.auth_blocking._limit_usage_by_mau = False # Ensure does not throw exception self.get_success( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.user1, device_id=None, valid_until_ms=None ) ) @@ -134,7 +134,7 @@ class AuthTestCase(unittest.HomeserverTestCase): ) self.get_failure( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.user1, device_id=None, valid_until_ms=None ), ResourceLimitError, @@ -162,7 +162,7 @@ class AuthTestCase(unittest.HomeserverTestCase): # If not in monthly active cohort self.get_failure( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.user1, device_id=None, valid_until_ms=None ), ResourceLimitError, @@ -179,7 +179,7 @@ class AuthTestCase(unittest.HomeserverTestCase): return_value=make_awaitable(self.clock.time_msec()) ) self.get_success( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.user1, device_id=None, valid_until_ms=None ) ) @@ -197,7 +197,7 @@ class AuthTestCase(unittest.HomeserverTestCase): ) # Ensure does not raise exception self.get_success( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.user1, device_id=None, valid_until_ms=None ) ) diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index be008227df..0ea4e753e2 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2021 Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. - from unittest.mock import Mock import synapse.api.errors import synapse.rest.admin from synapse.api.constants import EventTypes -from synapse.config.room_directory import RoomDirectoryConfig from synapse.rest.client import directory, login, room from synapse.types import RoomAlias, create_requester @@ -394,22 +393,15 @@ class TestCreateAliasACL(unittest.HomeserverTestCase): servlets = [directory.register_servlets, room.register_servlets] - def prepare(self, reactor, clock, hs): - # We cheekily override the config to add custom alias creation rules - config = {} + def default_config(self): + config = super().default_config() + + # Add custom alias creation rules to the config. config["alias_creation_rules"] = [ {"user_id": "*", "alias": "#unofficial_*", "action": "allow"} ] - config["room_list_publication_rules"] = [] - rd_config = RoomDirectoryConfig() - rd_config.read_config(config) - - self.hs.config.roomdirectory.is_alias_creation_allowed = ( - rd_config.is_alias_creation_allowed - ) - - return hs + return config def test_denied(self): room_id = self.helper.create_room_as(self.user_id) @@ -417,7 +409,7 @@ class TestCreateAliasACL(unittest.HomeserverTestCase): channel = self.make_request( "PUT", b"directory/room/%23test%3Atest", - ('{"room_id":"%s"}' % (room_id,)).encode("ascii"), + {"room_id": room_id}, ) self.assertEquals(403, channel.code, channel.result) @@ -427,14 +419,35 @@ class TestCreateAliasACL(unittest.HomeserverTestCase): channel = self.make_request( "PUT", b"directory/room/%23unofficial_test%3Atest", - ('{"room_id":"%s"}' % (room_id,)).encode("ascii"), + {"room_id": room_id}, ) self.assertEquals(200, channel.code, channel.result) + def test_denied_during_creation(self): + """A room alias that is not allowed should be rejected during creation.""" + # Invalid room alias. + self.helper.create_room_as( + self.user_id, + expect_code=403, + extra_content={"room_alias_name": "foo"}, + ) -class TestCreatePublishedRoomACL(unittest.HomeserverTestCase): - data = {"room_alias_name": "unofficial_test"} + def test_allowed_during_creation(self): + """A valid room alias should be allowed during creation.""" + room_id = self.helper.create_room_as( + self.user_id, + extra_content={"room_alias_name": "unofficial_test"}, + ) + channel = self.make_request( + "GET", + b"directory/room/%23unofficial_test%3Atest", + ) + self.assertEquals(200, channel.code, channel.result) + self.assertEquals(channel.json_body["room_id"], room_id) + + +class TestCreatePublishedRoomACL(unittest.HomeserverTestCase): servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, login.register_servlets, @@ -443,27 +456,30 @@ class TestCreatePublishedRoomACL(unittest.HomeserverTestCase): ] hijack_auth = False - def prepare(self, reactor, clock, hs): - self.allowed_user_id = self.register_user("allowed", "pass") - self.allowed_access_token = self.login("allowed", "pass") + data = {"room_alias_name": "unofficial_test"} + allowed_localpart = "allowed" - self.denied_user_id = self.register_user("denied", "pass") - self.denied_access_token = self.login("denied", "pass") + def default_config(self): + config = super().default_config() - # This time we add custom room list publication rules - config = {} - config["alias_creation_rules"] = [] + # Add custom room list publication rules to the config. config["room_list_publication_rules"] = [ + { + "user_id": "@" + self.allowed_localpart + "*", + "alias": "#unofficial_*", + "action": "allow", + }, {"user_id": "*", "alias": "*", "action": "deny"}, - {"user_id": self.allowed_user_id, "alias": "*", "action": "allow"}, ] - rd_config = RoomDirectoryConfig() - rd_config.read_config(config) + return config - self.hs.config.roomdirectory.is_publishing_room_allowed = ( - rd_config.is_publishing_room_allowed - ) + def prepare(self, reactor, clock, hs): + self.allowed_user_id = self.register_user(self.allowed_localpart, "pass") + self.allowed_access_token = self.login(self.allowed_localpart, "pass") + + self.denied_user_id = self.register_user("denied", "pass") + self.denied_access_token = self.login("denied", "pass") return hs @@ -505,10 +521,23 @@ class TestCreatePublishedRoomACL(unittest.HomeserverTestCase): self.allowed_user_id, tok=self.allowed_access_token, extra_content=self.data, - is_public=False, + is_public=True, expect_code=200, ) + def test_denied_publication_with_invalid_alias(self): + """ + Try to create a room, register an alias for it, and publish it, + as a user WITH permission to publish rooms. + """ + self.helper.create_room_as( + self.allowed_user_id, + tok=self.allowed_access_token, + extra_content={"room_alias_name": "foo"}, + is_public=True, + expect_code=403, + ) + def test_can_create_as_private_room_after_rejection(self): """ After failing to publish a room with an alias as a user without publish permission, diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 0c3b86fda9..f0723892e4 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -162,6 +162,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): local_user = "@boris:" + self.hs.hostname device_id = "xyz" fallback_key = {"alg1:k1": "key1"} + fallback_key2 = {"alg1:k2": "key2"} otk = {"alg1:k2": "key2"} # we shouldn't have any unused fallback keys yet @@ -213,6 +214,35 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): {"failures": {}, "one_time_keys": {local_user: {device_id: fallback_key}}}, ) + # re-uploading the same fallback key should still result in no unused fallback + # keys + self.get_success( + self.handler.upload_keys_for_user( + local_user, + device_id, + {"org.matrix.msc2732.fallback_keys": fallback_key}, + ) + ) + + res = self.get_success( + self.store.get_e2e_unused_fallback_key_types(local_user, device_id) + ) + self.assertEqual(res, []) + + # uploading a new fallback key should result in an unused fallback key + self.get_success( + self.handler.upload_keys_for_user( + local_user, + device_id, + {"org.matrix.msc2732.fallback_keys": fallback_key2}, + ) + ) + + res = self.get_success( + self.store.get_e2e_unused_fallback_key_types(local_user, device_id) + ) + self.assertEqual(res, ["alg1"]) + # if the user uploads a one-time key, the next claim should fetch the # one-time key, and then go back to the fallback self.get_success( @@ -238,7 +268,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): ) self.assertEqual( res, - {"failures": {}, "one_time_keys": {local_user: {device_id: fallback_key}}}, + {"failures": {}, "one_time_keys": {local_user: {device_id: fallback_key2}}}, ) def test_replace_master_key(self): diff --git a/tests/handlers/test_room_summary.py b/tests/handlers/test_room_summary.py index 86beb8ff08..e85d112ecc 100644 --- a/tests/handlers/test_room_summary.py +++ b/tests/handlers/test_room_summary.py @@ -14,6 +14,8 @@ from typing import Any, Iterable, List, Optional, Tuple from unittest import mock +from twisted.internet.defer import ensureDeferred + from synapse.api.constants import ( EventContentFields, EventTypes, @@ -30,7 +32,7 @@ from synapse.handlers.room_summary import _RoomEntry, child_events_comparison_ke from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer -from synapse.types import JsonDict, UserID +from synapse.types import JsonDict, UserID, create_requester from tests import unittest @@ -247,7 +249,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): self._assert_rooms(result, expected) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -261,7 +263,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): expected = [(self.space, [self.room]), (self.room, ())] self._assert_rooms(result, expected) - result = self.get_success(self.handler.get_room_hierarchy(user2, self.space)) + result = self.get_success( + self.handler.get_room_hierarchy(create_requester(user2), self.space) + ) self._assert_hierarchy(result, expected) # If the space is made invite-only, it should no longer be viewable. @@ -272,7 +276,10 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): tok=self.token, ) self.get_failure(self.handler.get_space_summary(user2, self.space), AuthError) - self.get_failure(self.handler.get_room_hierarchy(user2, self.space), AuthError) + self.get_failure( + self.handler.get_room_hierarchy(create_requester(user2), self.space), + AuthError, + ) # If the space is made world-readable it should return a result. self.helper.send_state( @@ -284,7 +291,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): result = self.get_success(self.handler.get_space_summary(user2, self.space)) self._assert_rooms(result, expected) - result = self.get_success(self.handler.get_room_hierarchy(user2, self.space)) + result = self.get_success( + self.handler.get_room_hierarchy(create_requester(user2), self.space) + ) self._assert_hierarchy(result, expected) # Make it not world-readable again and confirm it results in an error. @@ -295,7 +304,10 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): tok=self.token, ) self.get_failure(self.handler.get_space_summary(user2, self.space), AuthError) - self.get_failure(self.handler.get_room_hierarchy(user2, self.space), AuthError) + self.get_failure( + self.handler.get_room_hierarchy(create_requester(user2), self.space), + AuthError, + ) # Join the space and results should be returned. self.helper.invite(self.space, targ=user2, tok=self.token) @@ -303,7 +315,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): result = self.get_success(self.handler.get_space_summary(user2, self.space)) self._assert_rooms(result, expected) - result = self.get_success(self.handler.get_room_hierarchy(user2, self.space)) + result = self.get_success( + self.handler.get_room_hierarchy(create_requester(user2), self.space) + ) self._assert_hierarchy(result, expected) # Attempting to view an unknown room returns the same error. @@ -312,10 +326,67 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): AuthError, ) self.get_failure( - self.handler.get_room_hierarchy(user2, "#not-a-space:" + self.hs.hostname), + self.handler.get_room_hierarchy( + create_requester(user2), "#not-a-space:" + self.hs.hostname + ), AuthError, ) + def test_room_hierarchy_cache(self) -> None: + """In-flight room hierarchy requests are deduplicated.""" + # Run two `get_room_hierarchy` calls up until they block. + deferred1 = ensureDeferred( + self.handler.get_room_hierarchy(create_requester(self.user), self.space) + ) + deferred2 = ensureDeferred( + self.handler.get_room_hierarchy(create_requester(self.user), self.space) + ) + + # Complete the two calls. + result1 = self.get_success(deferred1) + result2 = self.get_success(deferred2) + + # Both `get_room_hierarchy` calls should return the same result. + expected = [(self.space, [self.room]), (self.room, ())] + self._assert_hierarchy(result1, expected) + self._assert_hierarchy(result2, expected) + self.assertIs(result1, result2) + + # A subsequent `get_room_hierarchy` call should not reuse the result. + result3 = self.get_success( + self.handler.get_room_hierarchy(create_requester(self.user), self.space) + ) + self._assert_hierarchy(result3, expected) + self.assertIsNot(result1, result3) + + def test_room_hierarchy_cache_sharing(self) -> None: + """Room hierarchy responses for different users are not shared.""" + user2 = self.register_user("user2", "pass") + + # Make the room within the space invite-only. + self.helper.send_state( + self.room, + event_type=EventTypes.JoinRules, + body={"join_rule": JoinRules.INVITE}, + tok=self.token, + ) + + # Run two `get_room_hierarchy` calls for different users up until they block. + deferred1 = ensureDeferred( + self.handler.get_room_hierarchy(create_requester(self.user), self.space) + ) + deferred2 = ensureDeferred( + self.handler.get_room_hierarchy(create_requester(user2), self.space) + ) + + # Complete the two calls. + result1 = self.get_success(deferred1) + result2 = self.get_success(deferred2) + + # The `get_room_hierarchy` calls should return different results. + self._assert_hierarchy(result1, [(self.space, [self.room]), (self.room, ())]) + self._assert_hierarchy(result2, [(self.space, [self.room])]) + def _create_room_with_join_rule( self, join_rule: str, room_version: Optional[str] = None, **extra_content ) -> str: @@ -410,7 +481,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): ] self._assert_rooms(result, expected) - result = self.get_success(self.handler.get_room_hierarchy(user2, self.space)) + result = self.get_success( + self.handler.get_room_hierarchy(create_requester(user2), self.space) + ) self._assert_hierarchy(result, expected) def test_complex_space(self): @@ -452,7 +525,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): self._assert_rooms(result, expected) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -467,7 +540,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): room_ids.append(self.room) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, limit=7) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, limit=7 + ) ) # The result should have the space and all of the links, plus some of the # rooms and a pagination token. @@ -479,7 +554,10 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): # Check the next page. result = self.get_success( self.handler.get_room_hierarchy( - self.user, self.space, limit=5, from_token=result["next_batch"] + create_requester(self.user), + self.space, + limit=5, + from_token=result["next_batch"], ) ) # The result should have the space and the room in it, along with a link @@ -499,20 +577,22 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): room_ids.append(self.room) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, limit=7) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, limit=7 + ) ) self.assertIn("next_batch", result) # Changing the room ID, suggested-only, or max-depth causes an error. self.get_failure( self.handler.get_room_hierarchy( - self.user, self.room, from_token=result["next_batch"] + create_requester(self.user), self.room, from_token=result["next_batch"] ), SynapseError, ) self.get_failure( self.handler.get_room_hierarchy( - self.user, + create_requester(self.user), self.space, suggested_only=True, from_token=result["next_batch"], @@ -521,14 +601,19 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): ) self.get_failure( self.handler.get_room_hierarchy( - self.user, self.space, max_depth=0, from_token=result["next_batch"] + create_requester(self.user), + self.space, + max_depth=0, + from_token=result["next_batch"], ), SynapseError, ) # An invalid token is ignored. self.get_failure( - self.handler.get_room_hierarchy(self.user, self.space, from_token="foo"), + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, from_token="foo" + ), SynapseError, ) @@ -554,14 +639,18 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): # Test just the space itself. result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, max_depth=0) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, max_depth=0 + ) ) expected: List[Tuple[str, Iterable[str]]] = [(spaces[0], [rooms[0], spaces[1]])] self._assert_hierarchy(result, expected) # A single additional layer. result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, max_depth=1) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, max_depth=1 + ) ) expected += [ (rooms[0], ()), @@ -571,7 +660,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): # A few layers. result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, max_depth=3) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, max_depth=3 + ) ) expected += [ (rooms[1], ()), @@ -602,7 +693,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): self._assert_rooms(result, expected) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -684,7 +775,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): new=summarize_remote_room_hierarchy, ): result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -851,7 +942,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): new=summarize_remote_room_hierarchy, ): result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -909,7 +1000,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): new=summarize_remote_room_hierarchy, ): result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) diff --git a/tests/http/test_endpoint.py b/tests/http/test_endpoint.py index 1f9a2f9b1d..c8cc21cadd 100644 --- a/tests/http/test_endpoint.py +++ b/tests/http/test_endpoint.py @@ -36,8 +36,11 @@ class ServerNameTestCase(unittest.TestCase): "localhost:http", # non-numeric port "1234]", # smells like ipv6 literal but isn't "[1234", + "[1.2.3.4]", "underscore_.com", "percent%65.com", + "newline.com\n", + ".empty-label.com", "1234:5678:80", # too many colons ] for i in test_data: diff --git a/tests/push/test_email.py b/tests/push/test_email.py index 90f800e564..f8cba7b645 100644 --- a/tests/push/test_email.py +++ b/tests/push/test_email.py @@ -128,6 +128,7 @@ class EmailPusherTests(HomeserverTestCase): ) self.auth_handler = hs.get_auth_handler() + self.store = hs.get_datastore() def test_need_validated_email(self): """Test that we can only add an email pusher if the user has validated @@ -408,13 +409,7 @@ class EmailPusherTests(HomeserverTestCase): self.hs.get_datastore().db_pool.updates._all_done = False # Now let's actually drive the updates to completion - while not self.get_success( - self.hs.get_datastore().db_pool.updates.has_completed_background_updates() - ): - self.get_success( - self.hs.get_datastore().db_pool.updates.do_next_background_update(100), - by=0.1, - ) + self.wait_for_background_updates() # Check that all pushers with unlinked addresses were deleted pushers = self.get_success( diff --git a/tests/replication/test_sharded_event_persister.py b/tests/replication/test_sharded_event_persister.py index 0a6e4795ee..596ba5a0c9 100644 --- a/tests/replication/test_sharded_event_persister.py +++ b/tests/replication/test_sharded_event_persister.py @@ -17,6 +17,7 @@ from unittest.mock import patch from synapse.api.room_versions import RoomVersion from synapse.rest import admin from synapse.rest.client import login, room, sync +from synapse.storage.util.id_generators import MultiWriterIdGenerator from tests.replication._base import BaseMultiWorkerStreamTestCase from tests.server import make_request @@ -193,7 +194,10 @@ class EventPersisterShardTestCase(BaseMultiWorkerStreamTestCase): # # Worker2's event stream position will not advance until we call # __aexit__ again. - actx = worker_hs2.get_datastore()._stream_id_gen.get_next() + worker_store2 = worker_hs2.get_datastore() + assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) + + actx = worker_store2._stream_id_gen.get_next() self.get_success(actx.__aenter__()) response = self.helper.send(room_id1, body="Hi!", tok=self.other_access_token) diff --git a/tests/rest/admin/test_background_updates.py b/tests/rest/admin/test_background_updates.py index 78c48db552..62f242baf6 100644 --- a/tests/rest/admin/test_background_updates.py +++ b/tests/rest/admin/test_background_updates.py @@ -11,10 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from http import HTTPStatus +from typing import Collection + +from parameterized import parameterized import synapse.rest.admin +from synapse.api.errors import Codes from synapse.rest.client import login from synapse.server import HomeServer +from synapse.storage.background_updates import BackgroundUpdater from tests import unittest @@ -30,6 +36,60 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): self.admin_user = self.register_user("admin", "pass", admin=True) self.admin_user_tok = self.login("admin", "pass") + @parameterized.expand( + [ + ("GET", "/_synapse/admin/v1/background_updates/enabled"), + ("POST", "/_synapse/admin/v1/background_updates/enabled"), + ("GET", "/_synapse/admin/v1/background_updates/status"), + ("POST", "/_synapse/admin/v1/background_updates/start_job"), + ] + ) + def test_requester_is_no_admin(self, method: str, url: str): + """ + If the user is not a server admin, an error 403 is returned. + """ + + self.register_user("user", "pass", admin=False) + other_user_tok = self.login("user", "pass") + + channel = self.make_request( + method, + url, + content={}, + access_token=other_user_tok, + ) + + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.json_body) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_invalid_parameter(self): + """ + If parameters are invalid, an error is returned. + """ + url = "/_synapse/admin/v1/background_updates/start_job" + + # empty content + channel = self.make_request( + "POST", + url, + content={}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.MISSING_PARAM, channel.json_body["errcode"]) + + # job_name invalid + channel = self.make_request( + "POST", + url, + content={"job_name": "unknown"}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"]) + def _register_bg_update(self): "Adds a bg update but doesn't start it" @@ -60,7 +120,7 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "/_synapse/admin/v1/background_updates/status", access_token=self.admin_user_tok, ) - self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) # Background updates should be enabled, but none should be running. self.assertDictEqual( @@ -75,14 +135,14 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): self._register_bg_update() self.store.db_pool.updates.start_doing_background_updates() - self.reactor.pump([1.0, 1.0]) + self.reactor.pump([1.0, 1.0, 1.0]) channel = self.make_request( "GET", "/_synapse/admin/v1/background_updates/status", access_token=self.admin_user_tok, ) - self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) # Background updates should be enabled, and one should be running. self.assertDictEqual( @@ -91,9 +151,11 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "current_updates": { "master": { "name": "test_update", - "average_items_per_ms": 0.1, + "average_items_per_ms": 0.001, "total_duration_ms": 1000.0, - "total_item_count": 100, + "total_item_count": ( + BackgroundUpdater.MINIMUM_BACKGROUND_BATCH_SIZE + ), } }, "enabled": True, @@ -114,7 +176,7 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "/_synapse/admin/v1/background_updates/enabled", access_token=self.admin_user_tok, ) - self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertDictEqual(channel.json_body, {"enabled": True}) # Disable the BG updates @@ -124,7 +186,7 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): content={"enabled": False}, access_token=self.admin_user_tok, ) - self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertDictEqual(channel.json_body, {"enabled": False}) # Advance a bit and get the current status, note this will finish the in @@ -137,16 +199,18 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "/_synapse/admin/v1/background_updates/status", access_token=self.admin_user_tok, ) - self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertDictEqual( channel.json_body, { "current_updates": { "master": { "name": "test_update", - "average_items_per_ms": 0.1, + "average_items_per_ms": 0.001, "total_duration_ms": 1000.0, - "total_item_count": 100, + "total_item_count": ( + BackgroundUpdater.MINIMUM_BACKGROUND_BATCH_SIZE + ), } }, "enabled": False, @@ -162,7 +226,7 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "/_synapse/admin/v1/background_updates/status", access_token=self.admin_user_tok, ) - self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) # There should be no change from the previous /status response. self.assertDictEqual( @@ -171,9 +235,11 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "current_updates": { "master": { "name": "test_update", - "average_items_per_ms": 0.1, + "average_items_per_ms": 0.001, "total_duration_ms": 1000.0, - "total_item_count": 100, + "total_item_count": ( + BackgroundUpdater.MINIMUM_BACKGROUND_BATCH_SIZE + ), } }, "enabled": False, @@ -188,7 +254,7 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): content={"enabled": True}, access_token=self.admin_user_tok, ) - self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertDictEqual(channel.json_body, {"enabled": True}) @@ -199,7 +265,7 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "/_synapse/admin/v1/background_updates/status", access_token=self.admin_user_tok, ) - self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) # Background updates should be enabled and making progress. self.assertDictEqual( @@ -208,11 +274,92 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "current_updates": { "master": { "name": "test_update", - "average_items_per_ms": 0.1, + "average_items_per_ms": 0.001, "total_duration_ms": 2000.0, - "total_item_count": 200, + "total_item_count": ( + 2 * BackgroundUpdater.MINIMUM_BACKGROUND_BATCH_SIZE + ), } }, "enabled": True, }, ) + + @parameterized.expand( + [ + ("populate_stats_process_rooms", ["populate_stats_process_rooms"]), + ( + "regenerate_directory", + [ + "populate_user_directory_createtables", + "populate_user_directory_process_rooms", + "populate_user_directory_process_users", + "populate_user_directory_cleanup", + ], + ), + ] + ) + def test_start_backround_job(self, job_name: str, updates: Collection[str]): + """ + Test that background updates add to database and be processed. + + Args: + job_name: name of the job to call with API + updates: collection of background updates to be started + """ + + # no background update is waiting + self.assertTrue( + self.get_success( + self.store.db_pool.updates.has_completed_background_updates() + ) + ) + + channel = self.make_request( + "POST", + "/_synapse/admin/v1/background_updates/start_job", + content={"job_name": job_name}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + + # test that each background update is waiting now + for update in updates: + self.assertFalse( + self.get_success( + self.store.db_pool.updates.has_completed_background_update(update) + ) + ) + + self.wait_for_background_updates() + + # background updates are done + self.assertTrue( + self.get_success( + self.store.db_pool.updates.has_completed_background_updates() + ) + ) + + def test_start_backround_job_twice(self): + """Test that add a background update twice return an error.""" + + # add job to database + self.get_success( + self.store.db_pool.simple_insert( + table="background_updates", + values={ + "update_name": "populate_stats_process_rooms", + "progress_json": "{}", + }, + ) + ) + + channel = self.make_request( + "POST", + "/_synapse/admin/v1/background_updates/start_job", + content={"job_name": "populate_stats_process_rooms"}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index b48fc12e5f..07077aff78 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -2226,6 +2226,234 @@ class MakeRoomAdminTestCase(unittest.HomeserverTestCase): ) +class BlockRoomTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self._store = hs.get_datastore() + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.other_user = self.register_user("user", "pass") + self.other_user_tok = self.login("user", "pass") + + self.room_id = self.helper.create_room_as( + self.other_user, tok=self.other_user_tok + ) + self.url = "/_synapse/admin/v1/rooms/%s/block" + + @parameterized.expand([("PUT",), ("GET",)]) + def test_requester_is_no_admin(self, method: str): + """If the user is not a server admin, an error 403 is returned.""" + + channel = self.make_request( + method, + self.url % self.room_id, + content={}, + access_token=self.other_user_tok, + ) + + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.json_body) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + @parameterized.expand([("PUT",), ("GET",)]) + def test_room_is_not_valid(self, method: str): + """Check that invalid room names, return an error 400.""" + + channel = self.make_request( + method, + self.url % "invalidroom", + content={}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual( + "invalidroom is not a legal room ID", + channel.json_body["error"], + ) + + def test_block_is_not_valid(self): + """If parameter `block` is not valid, return an error.""" + + # `block` is not valid + channel = self.make_request( + "PUT", + self.url % self.room_id, + content={"block": "NotBool"}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.BAD_JSON, channel.json_body["errcode"]) + + # `block` is not set + channel = self.make_request( + "PUT", + self.url % self.room_id, + content={}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.MISSING_PARAM, channel.json_body["errcode"]) + + # no content is send + channel = self.make_request( + "PUT", + self.url % self.room_id, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_JSON, channel.json_body["errcode"]) + + def test_block_room(self): + """Test that block a room is successful.""" + + def _request_and_test_block_room(room_id: str) -> None: + self._is_blocked(room_id, expect=False) + channel = self.make_request( + "PUT", + self.url % room_id, + content={"block": True}, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertTrue(channel.json_body["block"]) + self._is_blocked(room_id, expect=True) + + # known internal room + _request_and_test_block_room(self.room_id) + + # unknown internal room + _request_and_test_block_room("!unknown:test") + + # unknown remote room + _request_and_test_block_room("!unknown:remote") + + def test_block_room_twice(self): + """Test that block a room that is already blocked is successful.""" + + self._is_blocked(self.room_id, expect=False) + for _ in range(2): + channel = self.make_request( + "PUT", + self.url % self.room_id, + content={"block": True}, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertTrue(channel.json_body["block"]) + self._is_blocked(self.room_id, expect=True) + + def test_unblock_room(self): + """Test that unblock a room is successful.""" + + def _request_and_test_unblock_room(room_id: str) -> None: + self._block_room(room_id) + + channel = self.make_request( + "PUT", + self.url % room_id, + content={"block": False}, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertFalse(channel.json_body["block"]) + self._is_blocked(room_id, expect=False) + + # known internal room + _request_and_test_unblock_room(self.room_id) + + # unknown internal room + _request_and_test_unblock_room("!unknown:test") + + # unknown remote room + _request_and_test_unblock_room("!unknown:remote") + + def test_unblock_room_twice(self): + """Test that unblock a room that is not blocked is successful.""" + + self._block_room(self.room_id) + for _ in range(2): + channel = self.make_request( + "PUT", + self.url % self.room_id, + content={"block": False}, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertFalse(channel.json_body["block"]) + self._is_blocked(self.room_id, expect=False) + + def test_get_blocked_room(self): + """Test get status of a blocked room""" + + def _request_blocked_room(room_id: str) -> None: + self._block_room(room_id) + + channel = self.make_request( + "GET", + self.url % room_id, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertTrue(channel.json_body["block"]) + self.assertEqual(self.other_user, channel.json_body["user_id"]) + + # known internal room + _request_blocked_room(self.room_id) + + # unknown internal room + _request_blocked_room("!unknown:test") + + # unknown remote room + _request_blocked_room("!unknown:remote") + + def test_get_unblocked_room(self): + """Test get status of a unblocked room""" + + def _request_unblocked_room(room_id: str) -> None: + self._is_blocked(room_id, expect=False) + + channel = self.make_request( + "GET", + self.url % room_id, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertFalse(channel.json_body["block"]) + self.assertNotIn("user_id", channel.json_body) + + # known internal room + _request_unblocked_room(self.room_id) + + # unknown internal room + _request_unblocked_room("!unknown:test") + + # unknown remote room + _request_unblocked_room("!unknown:remote") + + def _is_blocked(self, room_id: str, expect: bool = True) -> None: + """Assert that the room is blocked or not""" + d = self._store.is_room_blocked(room_id) + if expect: + self.assertTrue(self.get_success(d)) + else: + self.assertIsNone(self.get_success(d)) + + def _block_room(self, room_id: str) -> None: + """Block a room in database""" + self.get_success(self._store.block_room(room_id, self.other_user)) + self._is_blocked(room_id, expect=True) + + PURGE_TABLES = [ "current_state_events", "event_backward_extremities", diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index c9fe0f06c2..5011e54563 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -1169,14 +1169,14 @@ class UserRestTestCase(unittest.HomeserverTestCase): # regardless of whether password login or SSO is allowed self.admin_user = self.register_user("admin", "pass", admin=True) self.admin_user_tok = self.get_success( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.admin_user, device_id=None, valid_until_ms=None ) ) self.other_user = self.register_user("user", "pass", displayname="User") self.other_user_token = self.get_success( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.other_user, device_id=None, valid_until_ms=None ) ) diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py index e2fcbdc63a..d8a94f4c12 100644 --- a/tests/rest/client/test_auth.py +++ b/tests/rest/client/test_auth.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from http import HTTPStatus from typing import Optional, Union from twisted.internet.defer import succeed @@ -513,12 +514,26 @@ class RefreshAuthTests(unittest.HomeserverTestCase): self.user_pass = "pass" self.user = self.register_user("test", self.user_pass) + def use_refresh_token(self, refresh_token: str) -> FakeChannel: + """ + Helper that makes a request to use a refresh token. + """ + return self.make_request( + "POST", + "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + {"refresh_token": refresh_token}, + ) + def test_login_issue_refresh_token(self): """ A login response should include a refresh_token only if asked. """ # Test login - body = {"type": "m.login.password", "user": "test", "password": self.user_pass} + body = { + "type": "m.login.password", + "user": "test", + "password": self.user_pass, + } login_without_refresh = self.make_request( "POST", "/_matrix/client/r0/login", body @@ -528,8 +543,8 @@ class RefreshAuthTests(unittest.HomeserverTestCase): login_with_refresh = self.make_request( "POST", - "/_matrix/client/r0/login?org.matrix.msc2918.refresh_token=true", - body, + "/_matrix/client/r0/login", + {"org.matrix.msc2918.refresh_token": True, **body}, ) self.assertEqual(login_with_refresh.code, 200, login_with_refresh.result) self.assertIn("refresh_token", login_with_refresh.json_body) @@ -555,11 +570,12 @@ class RefreshAuthTests(unittest.HomeserverTestCase): register_with_refresh = self.make_request( "POST", - "/_matrix/client/r0/register?org.matrix.msc2918.refresh_token=true", + "/_matrix/client/r0/register", { "username": "test3", "password": self.user_pass, "auth": {"type": LoginType.DUMMY}, + "org.matrix.msc2918.refresh_token": True, }, ) self.assertEqual(register_with_refresh.code, 200, register_with_refresh.result) @@ -570,10 +586,15 @@ class RefreshAuthTests(unittest.HomeserverTestCase): """ A refresh token can be used to issue a new access token. """ - body = {"type": "m.login.password", "user": "test", "password": self.user_pass} + body = { + "type": "m.login.password", + "user": "test", + "password": self.user_pass, + "org.matrix.msc2918.refresh_token": True, + } login_response = self.make_request( "POST", - "/_matrix/client/r0/login?org.matrix.msc2918.refresh_token=true", + "/_matrix/client/r0/login", body, ) self.assertEqual(login_response.code, 200, login_response.result) @@ -598,15 +619,20 @@ class RefreshAuthTests(unittest.HomeserverTestCase): refresh_response.json_body["refresh_token"], ) - @override_config({"access_token_lifetime": "1m"}) - def test_refresh_token_expiration(self): + @override_config({"refreshable_access_token_lifetime": "1m"}) + def test_refreshable_access_token_expiration(self): """ The access token should have some time as specified in the config. """ - body = {"type": "m.login.password", "user": "test", "password": self.user_pass} + body = { + "type": "m.login.password", + "user": "test", + "password": self.user_pass, + "org.matrix.msc2918.refresh_token": True, + } login_response = self.make_request( "POST", - "/_matrix/client/r0/login?org.matrix.msc2918.refresh_token=true", + "/_matrix/client/r0/login", body, ) self.assertEqual(login_response.code, 200, login_response.result) @@ -623,6 +649,128 @@ class RefreshAuthTests(unittest.HomeserverTestCase): self.assertApproximates( refresh_response.json_body["expires_in_ms"], 60 * 1000, 100 ) + access_token = refresh_response.json_body["access_token"] + + # Advance 59 seconds in the future (just shy of 1 minute, the time of expiry) + self.reactor.advance(59.0) + # Check that our token is valid + self.assertEqual( + self.make_request( + "GET", "/_matrix/client/v3/account/whoami", access_token=access_token + ).code, + HTTPStatus.OK, + ) + + # Advance 2 more seconds (just past the time of expiry) + self.reactor.advance(2.0) + # Check that our token is invalid + self.assertEqual( + self.make_request( + "GET", "/_matrix/client/v3/account/whoami", access_token=access_token + ).code, + HTTPStatus.UNAUTHORIZED, + ) + + @override_config( + {"refreshable_access_token_lifetime": "1m", "refresh_token_lifetime": "2m"} + ) + def test_refresh_token_expiry(self): + """ + The refresh token can be configured to have a limited lifetime. + When that lifetime has ended, the refresh token can no longer be used to + refresh the session. + """ + + body = { + "type": "m.login.password", + "user": "test", + "password": self.user_pass, + "org.matrix.msc2918.refresh_token": True, + } + login_response = self.make_request( + "POST", + "/_matrix/client/r0/login", + body, + ) + self.assertEqual(login_response.code, HTTPStatus.OK, login_response.result) + refresh_token1 = login_response.json_body["refresh_token"] + + # Advance 119 seconds in the future (just shy of 2 minutes) + self.reactor.advance(119.0) + + # Refresh our session. The refresh token should still JUST be valid right now. + # By doing so, we get a new access token and a new refresh token. + refresh_response = self.use_refresh_token(refresh_token1) + self.assertEqual(refresh_response.code, HTTPStatus.OK, refresh_response.result) + self.assertIn( + "refresh_token", + refresh_response.json_body, + "No new refresh token returned after refresh.", + ) + refresh_token2 = refresh_response.json_body["refresh_token"] + + # Advance 121 seconds in the future (just a bit more than 2 minutes) + self.reactor.advance(121.0) + + # Try to refresh our session, but instead notice that the refresh token is + # not valid (it just expired). + refresh_response = self.use_refresh_token(refresh_token2) + self.assertEqual( + refresh_response.code, HTTPStatus.FORBIDDEN, refresh_response.result + ) + + @override_config( + { + "refreshable_access_token_lifetime": "2m", + "refresh_token_lifetime": "2m", + "session_lifetime": "3m", + } + ) + def test_ultimate_session_expiry(self): + """ + The session can be configured to have an ultimate, limited lifetime. + """ + + body = { + "type": "m.login.password", + "user": "test", + "password": self.user_pass, + "org.matrix.msc2918.refresh_token": True, + } + login_response = self.make_request( + "POST", + "/_matrix/client/r0/login", + body, + ) + self.assertEqual(login_response.code, 200, login_response.result) + refresh_token = login_response.json_body["refresh_token"] + + # Advance shy of 2 minutes into the future + self.reactor.advance(119.0) + + # Refresh our session. The refresh token should still be valid right now. + refresh_response = self.use_refresh_token(refresh_token) + self.assertEqual(refresh_response.code, 200, refresh_response.result) + self.assertIn( + "refresh_token", + refresh_response.json_body, + "No new refresh token returned after refresh.", + ) + # Notice that our access token lifetime has been diminished to match the + # session lifetime. + # 3 minutes - 119 seconds = 61 seconds. + self.assertEqual(refresh_response.json_body["expires_in_ms"], 61_000) + refresh_token = refresh_response.json_body["refresh_token"] + + # Advance 61 seconds into the future. Our session should have expired + # now, because we've had our 3 minutes. + self.reactor.advance(61.0) + + # Try to issue a new, refreshed, access token. + # This should fail because the refresh token's lifetime has also been + # diminished as our session expired. + refresh_response = self.use_refresh_token(refresh_token) + self.assertEqual(refresh_response.code, 403, refresh_response.result) def test_refresh_token_invalidation(self): """Refresh tokens are invalidated after first use of the next token. @@ -640,10 +788,15 @@ class RefreshAuthTests(unittest.HomeserverTestCase): |-> fourth_refresh (fails) """ - body = {"type": "m.login.password", "user": "test", "password": self.user_pass} + body = { + "type": "m.login.password", + "user": "test", + "password": self.user_pass, + "org.matrix.msc2918.refresh_token": True, + } login_response = self.make_request( "POST", - "/_matrix/client/r0/login?org.matrix.msc2918.refresh_token=true", + "/_matrix/client/r0/login", body, ) self.assertEqual(login_response.code, 200, login_response.result) diff --git a/tests/rest/client/test_capabilities.py b/tests/rest/client/test_capabilities.py index b9e3602552..249808b031 100644 --- a/tests/rest/client/test_capabilities.py +++ b/tests/rest/client/test_capabilities.py @@ -71,7 +71,7 @@ class CapabilitiesTestCase(unittest.HomeserverTestCase): @override_config({"password_config": {"localdb_enabled": False}}) def test_get_change_password_capabilities_localdb_disabled(self): access_token = self.get_success( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.user, device_id=None, valid_until_ms=None ) ) @@ -85,7 +85,7 @@ class CapabilitiesTestCase(unittest.HomeserverTestCase): @override_config({"password_config": {"enabled": False}}) def test_get_change_password_capabilities_password_disabled(self): access_token = self.get_success( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.user, device_id=None, valid_until_ms=None ) ) @@ -174,7 +174,7 @@ class CapabilitiesTestCase(unittest.HomeserverTestCase): @override_config({"experimental_features": {"msc3244_enabled": False}}) def test_get_does_not_include_msc3244_fields_when_disabled(self): access_token = self.get_success( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.user, device_id=None, valid_until_ms=None ) ) @@ -189,7 +189,7 @@ class CapabilitiesTestCase(unittest.HomeserverTestCase): def test_get_does_include_msc3244_fields_when_enabled(self): access_token = self.get_success( - self.auth_handler.get_access_token_for_user_id( + self.auth_handler.create_access_token_for_user_id( self.user, device_id=None, valid_until_ms=None ) ) diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py index 0b90e3f803..19f5e46537 100644 --- a/tests/rest/client/test_login.py +++ b/tests/rest/client/test_login.py @@ -815,13 +815,20 @@ class JWTTestCase(unittest.HomeserverTestCase): jwt_secret = "secret" jwt_algorithm = "HS256" + base_config = { + "enabled": True, + "secret": jwt_secret, + "algorithm": jwt_algorithm, + } - def make_homeserver(self, reactor, clock): - self.hs = self.setup_test_homeserver() - self.hs.config.jwt.jwt_enabled = True - self.hs.config.jwt.jwt_secret = self.jwt_secret - self.hs.config.jwt.jwt_algorithm = self.jwt_algorithm - return self.hs + def default_config(self): + config = super().default_config() + + # If jwt_config has been defined (eg via @override_config), don't replace it. + if config.get("jwt_config") is None: + config["jwt_config"] = self.base_config + + return config def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_secret) -> str: # PyJWT 2.0.0 changed the return type of jwt.encode from bytes to str. @@ -879,16 +886,7 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") self.assertEqual(channel.json_body["error"], "Invalid JWT") - @override_config( - { - "jwt_config": { - "jwt_enabled": True, - "secret": jwt_secret, - "algorithm": jwt_algorithm, - "issuer": "test-issuer", - } - } - ) + @override_config({"jwt_config": {**base_config, "issuer": "test-issuer"}}) def test_login_iss(self): """Test validating the issuer claim.""" # A valid issuer. @@ -919,16 +917,7 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.result["code"], b"200", channel.result) self.assertEqual(channel.json_body["user_id"], "@kermit:test") - @override_config( - { - "jwt_config": { - "jwt_enabled": True, - "secret": jwt_secret, - "algorithm": jwt_algorithm, - "audiences": ["test-audience"], - } - } - ) + @override_config({"jwt_config": {**base_config, "audiences": ["test-audience"]}}) def test_login_aud(self): """Test validating the audience claim.""" # A valid audience. @@ -962,6 +951,19 @@ class JWTTestCase(unittest.HomeserverTestCase): channel.json_body["error"], "JWT validation failed: Invalid audience" ) + def test_login_default_sub(self): + """Test reading user ID from the default subject claim.""" + channel = self.jwt_login({"sub": "kermit"}) + self.assertEqual(channel.result["code"], b"200", channel.result) + self.assertEqual(channel.json_body["user_id"], "@kermit:test") + + @override_config({"jwt_config": {**base_config, "subject_claim": "username"}}) + def test_login_custom_sub(self): + """Test reading user ID from a custom subject claim.""" + channel = self.jwt_login({"username": "frog"}) + self.assertEqual(channel.result["code"], b"200", channel.result) + self.assertEqual(channel.json_body["user_id"], "@frog:test") + def test_login_no_token(self): params = {"type": "org.matrix.login.jwt"} channel = self.make_request(b"POST", LOGIN_URL, params) @@ -1024,12 +1026,14 @@ class JWTPubKeyTestCase(unittest.HomeserverTestCase): ] ) - def make_homeserver(self, reactor, clock): - self.hs = self.setup_test_homeserver() - self.hs.config.jwt.jwt_enabled = True - self.hs.config.jwt.jwt_secret = self.jwt_pubkey - self.hs.config.jwt.jwt_algorithm = "RS256" - return self.hs + def default_config(self): + config = super().default_config() + config["jwt_config"] = { + "enabled": True, + "secret": self.jwt_pubkey, + "algorithm": "RS256", + } + return config def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_privatekey) -> str: # PyJWT 2.0.0 changed the return type of jwt.encode from bytes to str. diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index 78c2fb86b9..eb10d43217 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -1,4 +1,5 @@ # Copyright 2019 New Vector Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -46,6 +47,8 @@ class RelationsTestCase(unittest.HomeserverTestCase): return config def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.user_id, self.user_token = self._create_user("alice") self.user2_id, self.user2_token = self._create_user("bob") @@ -91,6 +94,49 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel = self._send_relation(RelationTypes.ANNOTATION, EventTypes.Member) self.assertEquals(400, channel.code, channel.json_body) + def test_deny_invalid_event(self): + """Test that we deny relations on non-existant events""" + channel = self._send_relation( + RelationTypes.ANNOTATION, + EventTypes.Message, + parent_id="foo", + content={"body": "foo", "msgtype": "m.text"}, + ) + self.assertEquals(400, channel.code, channel.json_body) + + # Unless that event is referenced from another event! + self.get_success( + self.hs.get_datastore().db_pool.simple_insert( + table="event_relations", + values={ + "event_id": "bar", + "relates_to_id": "foo", + "relation_type": RelationTypes.THREAD, + }, + desc="test_deny_invalid_event", + ) + ) + channel = self._send_relation( + RelationTypes.THREAD, + EventTypes.Message, + parent_id="foo", + content={"body": "foo", "msgtype": "m.text"}, + ) + self.assertEquals(200, channel.code, channel.json_body) + + def test_deny_invalid_room(self): + """Test that we deny relations on non-existant events""" + # Create another room and send a message in it. + room2 = self.helper.create_room_as(self.user_id, tok=self.user_token) + res = self.helper.send(room2, body="Hi!", tok=self.user_token) + parent_id = res["event_id"] + + # Attempt to send an annotation to that event. + channel = self._send_relation( + RelationTypes.ANNOTATION, "m.reaction", parent_id=parent_id, key="A" + ) + self.assertEquals(400, channel.code, channel.json_body) + def test_deny_double_react(self): """Test that we deny relations on membership events""" channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="a") @@ -99,6 +145,25 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") self.assertEquals(400, channel.code, channel.json_body) + def test_deny_forked_thread(self): + """It is invalid to start a thread off a thread.""" + channel = self._send_relation( + RelationTypes.THREAD, + "m.room.message", + content={"msgtype": "m.text", "body": "foo"}, + parent_id=self.parent_id, + ) + self.assertEquals(200, channel.code, channel.json_body) + parent_id = channel.json_body["event_id"] + + channel = self._send_relation( + RelationTypes.THREAD, + "m.room.message", + content={"msgtype": "m.text", "body": "foo"}, + parent_id=parent_id, + ) + self.assertEquals(400, channel.code, channel.json_body) + def test_basic_paginate_relations(self): """Tests that calling pagination API correctly the latest relations.""" channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") @@ -703,6 +768,52 @@ class RelationsTestCase(unittest.HomeserverTestCase): self.assertIn("chunk", channel.json_body) self.assertEquals(channel.json_body["chunk"], []) + def test_unknown_relations(self): + """Unknown relations should be accepted.""" + channel = self._send_relation("m.relation.test", "m.room.test") + self.assertEquals(200, channel.code, channel.json_body) + event_id = channel.json_body["event_id"] + + channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/relations/%s?limit=1" + % (self.room, self.parent_id), + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + + # We expect to get back a single pagination result, which is the full + # relation event we sent above. + self.assertEquals(len(channel.json_body["chunk"]), 1, channel.json_body) + self.assert_dict( + {"event_id": event_id, "sender": self.user_id, "type": "m.room.test"}, + channel.json_body["chunk"][0], + ) + + # We also expect to get the original event (the id of which is self.parent_id) + self.assertEquals( + channel.json_body["original_event"]["event_id"], self.parent_id + ) + + # When bundling the unknown relation is not included. + channel = self.make_request( + "GET", + "/rooms/%s/event/%s" % (self.room, self.parent_id), + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + self.assertNotIn("m.relations", channel.json_body["unsigned"]) + + # But unknown relations can be directly queried. + channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/aggregations/%s?limit=1" + % (self.room, self.parent_id), + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + self.assertEquals(channel.json_body["chunk"], []) + def _send_relation( self, relation_type: str, @@ -749,3 +860,65 @@ class RelationsTestCase(unittest.HomeserverTestCase): access_token = self.login(localpart, "abc123") return user_id, access_token + + def test_background_update(self): + """Test the event_arbitrary_relations background update.""" + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="👍") + self.assertEquals(200, channel.code, channel.json_body) + annotation_event_id_good = channel.json_body["event_id"] + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="A") + self.assertEquals(200, channel.code, channel.json_body) + annotation_event_id_bad = channel.json_body["event_id"] + + channel = self._send_relation(RelationTypes.THREAD, "m.room.test") + self.assertEquals(200, channel.code, channel.json_body) + thread_event_id = channel.json_body["event_id"] + + # Clean-up the table as if the inserts did not happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="event_relations", + column="event_id", + iterable=(annotation_event_id_bad, thread_event_id), + keyvalues={}, + desc="RelationsTestCase.test_background_update", + ) + ) + + # Only the "good" annotation should be found. + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/rooms/{self.room}/relations/{self.parent_id}?limit=10", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + self.assertEquals( + [ev["event_id"] for ev in channel.json_body["chunk"]], + [annotation_event_id_good], + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + {"update_name": "event_arbitrary_relations", "progress_json": "{}"}, + ) + ) + + # Ugh, have to reset this flag + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # The "good" annotation and the thread should be found, but not the "bad" + # annotation. + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/rooms/{self.room}/relations/{self.parent_id}?limit=10", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + self.assertCountEqual( + [ev["event_id"] for ev in channel.json_body["chunk"]], + [annotation_event_id_good, thread_event_id], + ) diff --git a/tests/rest/media/v1/test_filepath.py b/tests/rest/media/v1/test_filepath.py index 09504a485f..8fe94f7d85 100644 --- a/tests/rest/media/v1/test_filepath.py +++ b/tests/rest/media/v1/test_filepath.py @@ -11,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import inspect +from typing import Iterable + from synapse.rest.media.v1.filepath import MediaFilePaths from tests import unittest @@ -236,3 +239,250 @@ class MediaFilePathsTestCase(unittest.TestCase): "/media_store/url_cache_thumbnails/Ge", ], ) + + def test_server_name_validation(self): + """Test validation of server names""" + self._test_path_validation( + [ + "remote_media_filepath_rel", + "remote_media_filepath", + "remote_media_thumbnail_rel", + "remote_media_thumbnail", + "remote_media_thumbnail_rel_legacy", + "remote_media_thumbnail_dir", + ], + parameter="server_name", + valid_values=[ + "matrix.org", + "matrix.org:8448", + "matrix-federation.matrix.org", + "matrix-federation.matrix.org:8448", + "10.1.12.123", + "10.1.12.123:8448", + "[fd00:abcd::ffff]", + "[fd00:abcd::ffff]:8448", + ], + invalid_values=[ + "/matrix.org", + "matrix.org/..", + "matrix.org\x00", + "", + ".", + "..", + "/", + ], + ) + + def test_file_id_validation(self): + """Test validation of local, remote and legacy URL cache file / media IDs""" + # File / media IDs get split into three parts to form paths, consisting of the + # first two characters, next two characters and rest of the ID. + valid_file_ids = [ + "GerZNDnDZVjsOtardLuwfIBg", + # Unexpected, but produces an acceptable path: + "GerZN", # "N" becomes the last directory + ] + invalid_file_ids = [ + "/erZNDnDZVjsOtardLuwfIBg", + "Ge/ZNDnDZVjsOtardLuwfIBg", + "GerZ/DnDZVjsOtardLuwfIBg", + "GerZ/..", + "G\x00rZNDnDZVjsOtardLuwfIBg", + "Ger\x00NDnDZVjsOtardLuwfIBg", + "GerZNDnDZVjsOtardLuwfIBg\x00", + "", + "Ge", + "GerZ", + "GerZ.", + "..rZNDnDZVjsOtardLuwfIBg", + "Ge..NDnDZVjsOtardLuwfIBg", + "GerZ..", + "GerZ/", + ] + + self._test_path_validation( + [ + "local_media_filepath_rel", + "local_media_filepath", + "local_media_thumbnail_rel", + "local_media_thumbnail", + "local_media_thumbnail_dir", + # Legacy URL cache media IDs + "url_cache_filepath_rel", + "url_cache_filepath", + # `url_cache_filepath_dirs_to_delete` is tested below. + "url_cache_thumbnail_rel", + "url_cache_thumbnail", + "url_cache_thumbnail_directory_rel", + "url_cache_thumbnail_directory", + "url_cache_thumbnail_dirs_to_delete", + ], + parameter="media_id", + valid_values=valid_file_ids, + invalid_values=invalid_file_ids, + ) + + # `url_cache_filepath_dirs_to_delete` ignores what would be the last path + # component, so only the first 4 characters matter. + self._test_path_validation( + [ + "url_cache_filepath_dirs_to_delete", + ], + parameter="media_id", + valid_values=valid_file_ids, + invalid_values=[ + "/erZNDnDZVjsOtardLuwfIBg", + "Ge/ZNDnDZVjsOtardLuwfIBg", + "G\x00rZNDnDZVjsOtardLuwfIBg", + "Ger\x00NDnDZVjsOtardLuwfIBg", + "", + "Ge", + "..rZNDnDZVjsOtardLuwfIBg", + "Ge..NDnDZVjsOtardLuwfIBg", + ], + ) + + self._test_path_validation( + [ + "remote_media_filepath_rel", + "remote_media_filepath", + "remote_media_thumbnail_rel", + "remote_media_thumbnail", + "remote_media_thumbnail_rel_legacy", + "remote_media_thumbnail_dir", + ], + parameter="file_id", + valid_values=valid_file_ids, + invalid_values=invalid_file_ids, + ) + + def test_url_cache_media_id_validation(self): + """Test validation of URL cache media IDs""" + self._test_path_validation( + [ + "url_cache_filepath_rel", + "url_cache_filepath", + # `url_cache_filepath_dirs_to_delete` only cares about the date prefix + "url_cache_thumbnail_rel", + "url_cache_thumbnail", + "url_cache_thumbnail_directory_rel", + "url_cache_thumbnail_directory", + "url_cache_thumbnail_dirs_to_delete", + ], + parameter="media_id", + valid_values=[ + "2020-01-02_GerZNDnDZVjsOtar", + "2020-01-02_G", # Unexpected, but produces an acceptable path + ], + invalid_values=[ + "2020-01-02", + "2020-01-02-", + "2020-01-02-.", + "2020-01-02-..", + "2020-01-02-/", + "2020-01-02-/GerZNDnDZVjsOtar", + "2020-01-02-GerZNDnDZVjsOtar/..", + "2020-01-02-GerZNDnDZVjsOtar\x00", + ], + ) + + def test_content_type_validation(self): + """Test validation of thumbnail content types""" + self._test_path_validation( + [ + "local_media_thumbnail_rel", + "local_media_thumbnail", + "remote_media_thumbnail_rel", + "remote_media_thumbnail", + "remote_media_thumbnail_rel_legacy", + "url_cache_thumbnail_rel", + "url_cache_thumbnail", + ], + parameter="content_type", + valid_values=[ + "image/jpeg", + ], + invalid_values=[ + "", # ValueError: not enough values to unpack + "image/jpeg/abc", # ValueError: too many values to unpack + "image/jpeg\x00", + ], + ) + + def test_thumbnail_method_validation(self): + """Test validation of thumbnail methods""" + self._test_path_validation( + [ + "local_media_thumbnail_rel", + "local_media_thumbnail", + "remote_media_thumbnail_rel", + "remote_media_thumbnail", + "url_cache_thumbnail_rel", + "url_cache_thumbnail", + ], + parameter="method", + valid_values=[ + "crop", + "scale", + ], + invalid_values=[ + "/scale", + "scale/..", + "scale\x00", + "/", + ], + ) + + def _test_path_validation( + self, + methods: Iterable[str], + parameter: str, + valid_values: Iterable[str], + invalid_values: Iterable[str], + ): + """Test that the specified methods validate the named parameter as expected + + Args: + methods: The names of `MediaFilePaths` methods to test + parameter: The name of the parameter to test + valid_values: A list of parameter values that are expected to be accepted + invalid_values: A list of parameter values that are expected to be rejected + + Raises: + AssertionError: If a value was accepted when it should have failed + validation. + ValueError: If a value failed validation when it should have been accepted. + """ + for method in methods: + get_path = getattr(self.filepaths, method) + + parameters = inspect.signature(get_path).parameters + kwargs = { + "server_name": "matrix.org", + "media_id": "GerZNDnDZVjsOtardLuwfIBg", + "file_id": "GerZNDnDZVjsOtardLuwfIBg", + "width": 800, + "height": 600, + "content_type": "image/jpeg", + "method": "scale", + } + + if get_path.__name__.startswith("url_"): + kwargs["media_id"] = "2020-01-02_GerZNDnDZVjsOtar" + + kwargs = {k: v for k, v in kwargs.items() if k in parameters} + kwargs.pop(parameter) + + for value in valid_values: + kwargs[parameter] = value + get_path(**kwargs) + # No exception should be raised + + for value in invalid_values: + with self.assertRaises(ValueError): + kwargs[parameter] = value + path_or_list = get_path(**kwargs) + self.fail( + f"{value!r} unexpectedly passed validation: " + f"{method} returned {path_or_list!r}" + ) diff --git a/tests/storage/databases/main/test_deviceinbox.py b/tests/storage/databases/main/test_deviceinbox.py index 4b67bd15b7..36c933b9e9 100644 --- a/tests/storage/databases/main/test_deviceinbox.py +++ b/tests/storage/databases/main/test_deviceinbox.py @@ -66,7 +66,7 @@ class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase): self.store.db_pool.simple_insert( "background_updates", { - "update_name": "remove_deleted_devices_from_device_inbox", + "update_name": "remove_dead_devices_from_device_inbox", "progress_json": "{}", }, ) @@ -140,7 +140,7 @@ class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase): self.store.db_pool.simple_insert( "background_updates", { - "update_name": "remove_hidden_devices_from_device_inbox", + "update_name": "remove_dead_devices_from_device_inbox", "progress_json": "{}", }, ) diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py index a649e8c618..5ae491ff5a 100644 --- a/tests/storage/databases/main/test_events_worker.py +++ b/tests/storage/databases/main/test_events_worker.py @@ -12,11 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. import json +from contextlib import contextmanager +from typing import Generator +from twisted.enterprise.adbapi import ConnectionPool +from twisted.internet.defer import ensureDeferred +from twisted.test.proto_helpers import MemoryReactor + +from synapse.api.room_versions import EventFormatVersions, RoomVersions from synapse.logging.context import LoggingContext from synapse.rest import admin from synapse.rest.client import login, room -from synapse.storage.databases.main.events_worker import EventsWorkerStore +from synapse.server import HomeServer +from synapse.storage.databases.main.events_worker import ( + EVENT_QUEUE_THREADS, + EventsWorkerStore, +) +from synapse.storage.types import Connection +from synapse.util import Clock from synapse.util.async_helpers import yieldable_gather_results from tests import unittest @@ -144,3 +157,127 @@ class EventCacheTestCase(unittest.HomeserverTestCase): # We should have fetched the event from the DB self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1) + + +class DatabaseOutageTestCase(unittest.HomeserverTestCase): + """Test event fetching during a database outage.""" + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer): + self.store: EventsWorkerStore = hs.get_datastore() + + self.room_id = f"!room:{hs.hostname}" + self.event_ids = [f"event{i}" for i in range(20)] + + self._populate_events() + + def _populate_events(self) -> None: + """Ensure that there are test events in the database. + + When testing with the in-memory SQLite database, all the events are lost during + the simulated outage. + + To ensure consistency between `room_id`s and `event_id`s before and after the + outage, rows are built and inserted manually. + + Upserts are used to handle the non-SQLite case where events are not lost. + """ + self.get_success( + self.store.db_pool.simple_upsert( + "rooms", + {"room_id": self.room_id}, + {"room_version": RoomVersions.V4.identifier}, + ) + ) + + self.event_ids = [f"event{i}" for i in range(20)] + for idx, event_id in enumerate(self.event_ids): + self.get_success( + self.store.db_pool.simple_upsert( + "events", + {"event_id": event_id}, + { + "event_id": event_id, + "room_id": self.room_id, + "topological_ordering": idx, + "stream_ordering": idx, + "type": "test", + "processed": True, + "outlier": False, + }, + ) + ) + self.get_success( + self.store.db_pool.simple_upsert( + "event_json", + {"event_id": event_id}, + { + "room_id": self.room_id, + "json": json.dumps({"type": "test", "room_id": self.room_id}), + "internal_metadata": "{}", + "format_version": EventFormatVersions.V3, + }, + ) + ) + + @contextmanager + def _outage(self) -> Generator[None, None, None]: + """Simulate a database outage. + + Returns: + A context manager. While the context is active, any attempts to connect to + the database will fail. + """ + connection_pool = self.store.db_pool._db_pool + + # Close all connections and shut down the database `ThreadPool`. + connection_pool.close() + + # Restart the database `ThreadPool`. + connection_pool.start() + + original_connection_factory = connection_pool.connectionFactory + + def connection_factory(_pool: ConnectionPool) -> Connection: + raise Exception("Could not connect to the database.") + + connection_pool.connectionFactory = connection_factory # type: ignore[assignment] + try: + yield + finally: + connection_pool.connectionFactory = original_connection_factory + + # If the in-memory SQLite database is being used, all the events are gone. + # Restore the test data. + self._populate_events() + + def test_failure(self) -> None: + """Test that event fetches do not get stuck during a database outage.""" + with self._outage(): + failure = self.get_failure( + self.store.get_event(self.event_ids[0]), Exception + ) + self.assertEqual(str(failure.value), "Could not connect to the database.") + + def test_recovery(self) -> None: + """Test that event fetchers recover after a database outage.""" + with self._outage(): + # Kick off a bunch of event fetches but do not pump the reactor + event_deferreds = [] + for event_id in self.event_ids: + event_deferreds.append(ensureDeferred(self.store.get_event(event_id))) + + # We should have maxed out on event fetcher threads + self.assertEqual(self.store._event_fetch_ongoing, EVENT_QUEUE_THREADS) + + # All the event fetchers will fail + self.pump() + self.assertEqual(self.store._event_fetch_ongoing, 0) + + for event_deferred in event_deferreds: + failure = self.get_failure(event_deferred, Exception) + self.assertEqual( + str(failure.value), "Could not connect to the database." + ) + + # This next event fetch should succeed + self.get_success(self.store.get_event(self.event_ids[0])) diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py index 0da42b5ac5..216d816d56 100644 --- a/tests/storage/test_background_update.py +++ b/tests/storage/test_background_update.py @@ -1,8 +1,11 @@ -from unittest.mock import Mock +from mock import Mock + +from twisted.internet.defer import Deferred, ensureDeferred from synapse.storage.background_updates import BackgroundUpdater from tests import unittest +from tests.test_utils import make_awaitable class BackgroundUpdateTestCase(unittest.HomeserverTestCase): @@ -19,11 +22,11 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): ) def test_do_background_update(self): - # the time we claim each update takes - duration_ms = 42 + # the time we claim it takes to update one item when running the update + duration_ms = 10 # the target runtime for each bg update - target_background_update_duration_ms = 50000 + target_background_update_duration_ms = 100 store = self.hs.get_datastore() self.get_success( @@ -48,16 +51,14 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): self.update_handler.side_effect = update self.update_handler.reset_mock() res = self.get_success( - self.updates.do_next_background_update( - target_background_update_duration_ms - ), - by=0.1, + self.updates.do_next_background_update(False), + by=0.01, ) self.assertFalse(res) # on the first call, we should get run with the default background update size self.update_handler.assert_called_once_with( - {"my_key": 1}, self.updates.DEFAULT_BACKGROUND_BATCH_SIZE + {"my_key": 1}, self.updates.MINIMUM_BACKGROUND_BATCH_SIZE ) # second step: complete the update @@ -74,16 +75,93 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): self.update_handler.side_effect = update self.update_handler.reset_mock() - result = self.get_success( - self.updates.do_next_background_update(target_background_update_duration_ms) - ) + result = self.get_success(self.updates.do_next_background_update(False)) self.assertFalse(result) self.update_handler.assert_called_once() # third step: we don't expect to be called any more self.update_handler.reset_mock() - result = self.get_success( - self.updates.do_next_background_update(target_background_update_duration_ms) - ) + result = self.get_success(self.updates.do_next_background_update(False)) self.assertTrue(result) self.assertFalse(self.update_handler.called) + + +class BackgroundUpdateControllerTestCase(unittest.HomeserverTestCase): + def prepare(self, reactor, clock, homeserver): + self.updates: BackgroundUpdater = self.hs.get_datastore().db_pool.updates + # the base test class should have run the real bg updates for us + self.assertTrue( + self.get_success(self.updates.has_completed_background_updates()) + ) + + self.update_deferred = Deferred() + self.update_handler = Mock(return_value=self.update_deferred) + self.updates.register_background_update_handler( + "test_update", self.update_handler + ) + + # Mock out the AsyncContextManager + self._update_ctx_manager = Mock(spec=["__aenter__", "__aexit__"]) + self._update_ctx_manager.__aenter__ = Mock( + return_value=make_awaitable(None), + ) + self._update_ctx_manager.__aexit__ = Mock(return_value=make_awaitable(None)) + + # Mock out the `update_handler` callback + self._on_update = Mock(return_value=self._update_ctx_manager) + + # Define a default batch size value that's not the same as the internal default + # value (100). + self._default_batch_size = 500 + + # Register the callbacks with more mocks + self.hs.get_module_api().register_background_update_controller_callbacks( + on_update=self._on_update, + min_batch_size=Mock(return_value=make_awaitable(self._default_batch_size)), + default_batch_size=Mock( + return_value=make_awaitable(self._default_batch_size), + ), + ) + + def test_controller(self): + store = self.hs.get_datastore() + self.get_success( + store.db_pool.simple_insert( + "background_updates", + values={"update_name": "test_update", "progress_json": "{}"}, + ) + ) + + # Set the return value for the context manager. + enter_defer = Deferred() + self._update_ctx_manager.__aenter__ = Mock(return_value=enter_defer) + + # Start the background update. + do_update_d = ensureDeferred(self.updates.do_next_background_update(True)) + + self.pump() + + # `run_update` should have been called, but the update handler won't be + # called until the `enter_defer` (returned by `__aenter__`) is resolved. + self._on_update.assert_called_once_with( + "test_update", + "master", + False, + ) + self.assertFalse(do_update_d.called) + self.assertFalse(self.update_deferred.called) + + # Resolving the `enter_defer` should call the update handler, which then + # blocks. + enter_defer.callback(100) + self.pump() + self.update_handler.assert_called_once_with({}, self._default_batch_size) + self.assertFalse(self.update_deferred.called) + self._update_ctx_manager.__aexit__.assert_not_called() + + # Resolving the update handler deferred should cause the + # `do_next_background_update` to finish and return + self.update_deferred.callback(100) + self.pump() + self._update_ctx_manager.__aexit__.assert_called() + self.get_success(do_update_d) diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index b31c5eb5ec..7b7f6c349e 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -664,7 +664,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): ): iterations += 1 self.get_success( - self.store.db_pool.updates.do_next_background_update(100), by=0.1 + self.store.db_pool.updates.do_next_background_update(False), by=0.1 ) # Ensure that we did actually take multiple iterations to process the @@ -723,7 +723,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): ): iterations += 1 self.get_success( - self.store.db_pool.updates.do_next_background_update(100), by=0.1 + self.store.db_pool.updates.do_next_background_update(False), by=0.1 ) # Ensure that we did actually take multiple iterations to process the diff --git a/tests/storage/test_rollback_worker.py b/tests/storage/test_rollback_worker.py index a6be9a1bb1..cfc8098af6 100644 --- a/tests/storage/test_rollback_worker.py +++ b/tests/storage/test_rollback_worker.py @@ -11,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import List +from unittest import mock + from synapse.app.generic_worker import GenericWorkerServer from synapse.storage.database import LoggingDatabaseConnection from synapse.storage.prepare_database import PrepareDatabaseException, prepare_database @@ -19,6 +22,22 @@ from synapse.storage.schema import SCHEMA_VERSION from tests.unittest import HomeserverTestCase +def fake_listdir(filepath: str) -> List[str]: + """ + A fake implementation of os.listdir which we can use to mock out the filesystem. + + Args: + filepath: The directory to list files for. + + Returns: + A list of files and folders in the directory. + """ + if filepath.endswith("full_schemas"): + return [str(SCHEMA_VERSION)] + + return ["99_add_unicorn_to_database.sql"] + + class WorkerSchemaTests(HomeserverTestCase): def make_homeserver(self, reactor, clock): hs = self.setup_test_homeserver( @@ -51,7 +70,7 @@ class WorkerSchemaTests(HomeserverTestCase): prepare_database(db_conn, db_pool.engine, self.hs.config) - def test_not_upgraded(self): + def test_not_upgraded_old_schema_version(self): """Test that workers don't start if the DB has an older schema version""" db_pool = self.hs.get_datastore().db_pool db_conn = LoggingDatabaseConnection( @@ -67,3 +86,34 @@ class WorkerSchemaTests(HomeserverTestCase): with self.assertRaises(PrepareDatabaseException): prepare_database(db_conn, db_pool.engine, self.hs.config) + + def test_not_upgraded_current_schema_version_with_outstanding_deltas(self): + """ + Test that workers don't start if the DB is on the current schema version, + but there are still outstanding delta migrations to run. + """ + db_pool = self.hs.get_datastore().db_pool + db_conn = LoggingDatabaseConnection( + db_pool._db_pool.connect(), + db_pool.engine, + "tests", + ) + + # Set the schema version of the database to the current version + cur = db_conn.cursor() + cur.execute("UPDATE schema_version SET version = ?", (SCHEMA_VERSION,)) + + db_conn.commit() + + # Path `os.listdir` here to make synapse think that there is a migration + # file ready to be run. + # Note that we can't patch this function for the whole method, else Synapse + # will try to find the file when building the database initially. + with mock.patch("os.listdir", mock.Mock(side_effect=fake_listdir)): + with self.assertRaises(PrepareDatabaseException): + # Synapse should think that there is an outstanding migration file due to + # patching 'os.listdir' in the function decorator. + # + # We expect Synapse to raise an exception to indicate the master process + # needs to apply this migration file. + prepare_database(db_conn, db_pool.engine, self.hs.config) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 37cf7bb232..7f5b28aed8 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -23,6 +23,7 @@ from synapse.rest import admin from synapse.rest.client import login, register, room from synapse.server import HomeServer from synapse.storage import DataStore +from synapse.storage.background_updates import _BackgroundUpdateHandler from synapse.storage.roommember import ProfileInfo from synapse.util import Clock @@ -391,7 +392,9 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): with mock.patch.dict( self.store.db_pool.updates._background_update_handlers, - populate_user_directory_process_users=mocked_process_users, + populate_user_directory_process_users=_BackgroundUpdateHandler( + mocked_process_users, + ), ): self._purge_and_rebuild_user_dir() diff --git a/tests/unittest.py b/tests/unittest.py index c9a08a3420..eea0903f05 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -331,12 +331,16 @@ class HomeserverTestCase(TestCase): time.sleep(0.01) def wait_for_background_updates(self) -> None: - """Block until all background database updates have completed.""" + """Block until all background database updates have completed. + + Note that callers must ensure there's a store property created on the + testcase. + """ while not self.get_success( self.store.db_pool.updates.has_completed_background_updates() ): self.get_success( - self.store.db_pool.updates.do_next_background_update(100), by=0.1 + self.store.db_pool.updates.do_next_background_update(False), by=0.1 ) def make_homeserver(self, reactor, clock): @@ -495,8 +499,7 @@ class HomeserverTestCase(TestCase): async def run_bg_updates(): with LoggingContext("run_bg_updates"): - while not await stor.db_pool.updates.has_completed_background_updates(): - await stor.db_pool.updates.do_next_background_update(1) + self.get_success(stor.db_pool.updates.run_background_updates(False)) hs = setup_test_homeserver(self.addCleanup, *args, **kwargs) stor = hs.get_datastore() diff --git a/tests/utils.py b/tests/utils.py index cf8ba5c5db..983859120f 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -119,7 +119,6 @@ def default_config(name, parse=False): "enable_registration": True, "enable_registration_captcha": False, "macaroon_secret_key": "not even a little secret", - "trusted_third_party_id_servers": [], "password_providers": [], "worker_replication_url": "", "worker_app": None, |